1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
53 #include "tm-constrs.h"
56 static int x86_builtin_vectorization_cost (bool);
58 #ifndef CHECK_STACK_LIMIT
59 #define CHECK_STACK_LIMIT (-1)
62 /* Return index of given mode in mult and division cost tables. */
63 #define MODE_INDEX(mode) \
64 ((mode) == QImode ? 0 \
65 : (mode) == HImode ? 1 \
66 : (mode) == SImode ? 2 \
67 : (mode) == DImode ? 3 \
70 /* Processor costs (relative to an add) */
71 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
72 #define COSTS_N_BYTES(N) ((N) * 2)
74 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
77 struct processor_costs size_cost = { /* costs for tuning for size */
78 COSTS_N_BYTES (2), /* cost of an add instruction */
79 COSTS_N_BYTES (3), /* cost of a lea instruction */
80 COSTS_N_BYTES (2), /* variable shift costs */
81 COSTS_N_BYTES (3), /* constant shift costs */
82 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 0, /* cost of multiply per each bit set */
88 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
89 COSTS_N_BYTES (3), /* HI */
90 COSTS_N_BYTES (3), /* SI */
91 COSTS_N_BYTES (3), /* DI */
92 COSTS_N_BYTES (5)}, /* other */
93 COSTS_N_BYTES (3), /* cost of movsx */
94 COSTS_N_BYTES (3), /* cost of movzx */
97 2, /* cost for loading QImode using movzbl */
98 {2, 2, 2}, /* cost of loading integer registers
99 in QImode, HImode and SImode.
100 Relative to reg-reg move (2). */
101 {2, 2, 2}, /* cost of storing integer registers */
102 2, /* cost of reg,reg fld/fst */
103 {2, 2, 2}, /* cost of loading fp registers
104 in SFmode, DFmode and XFmode */
105 {2, 2, 2}, /* cost of storing fp registers
106 in SFmode, DFmode and XFmode */
107 3, /* cost of moving MMX register */
108 {3, 3}, /* cost of loading MMX registers
109 in SImode and DImode */
110 {3, 3}, /* cost of storing MMX registers
111 in SImode and DImode */
112 3, /* cost of moving SSE register */
113 {3, 3, 3}, /* cost of loading SSE registers
114 in SImode, DImode and TImode */
115 {3, 3, 3}, /* cost of storing SSE registers
116 in SImode, DImode and TImode */
117 3, /* MMX or SSE register to integer */
118 0, /* size of l1 cache */
119 0, /* size of l2 cache */
120 0, /* size of prefetch block */
121 0, /* number of parallel prefetches */
123 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
124 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
125 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
126 COSTS_N_BYTES (2), /* cost of FABS instruction. */
127 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
128 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
129 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 1, /* scalar_stmt_cost. */
134 1, /* scalar load_cost. */
135 1, /* scalar_store_cost. */
136 1, /* vec_stmt_cost. */
137 1, /* vec_to_scalar_cost. */
138 1, /* scalar_to_vec_cost. */
139 1, /* vec_align_load_cost. */
140 1, /* vec_unalign_load_cost. */
141 1, /* vec_store_cost. */
142 1, /* cond_taken_branch_cost. */
143 1, /* cond_not_taken_branch_cost. */
146 /* Processor costs (relative to an add) */
148 struct processor_costs i386_cost = { /* 386 specific costs */
149 COSTS_N_INSNS (1), /* cost of an add instruction */
150 COSTS_N_INSNS (1), /* cost of a lea instruction */
151 COSTS_N_INSNS (3), /* variable shift costs */
152 COSTS_N_INSNS (2), /* constant shift costs */
153 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
154 COSTS_N_INSNS (6), /* HI */
155 COSTS_N_INSNS (6), /* SI */
156 COSTS_N_INSNS (6), /* DI */
157 COSTS_N_INSNS (6)}, /* other */
158 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
159 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
160 COSTS_N_INSNS (23), /* HI */
161 COSTS_N_INSNS (23), /* SI */
162 COSTS_N_INSNS (23), /* DI */
163 COSTS_N_INSNS (23)}, /* other */
164 COSTS_N_INSNS (3), /* cost of movsx */
165 COSTS_N_INSNS (2), /* cost of movzx */
166 15, /* "large" insn */
168 4, /* cost for loading QImode using movzbl */
169 {2, 4, 2}, /* cost of loading integer registers
170 in QImode, HImode and SImode.
171 Relative to reg-reg move (2). */
172 {2, 4, 2}, /* cost of storing integer registers */
173 2, /* cost of reg,reg fld/fst */
174 {8, 8, 8}, /* cost of loading fp registers
175 in SFmode, DFmode and XFmode */
176 {8, 8, 8}, /* cost of storing fp registers
177 in SFmode, DFmode and XFmode */
178 2, /* cost of moving MMX register */
179 {4, 8}, /* cost of loading MMX registers
180 in SImode and DImode */
181 {4, 8}, /* cost of storing MMX registers
182 in SImode and DImode */
183 2, /* cost of moving SSE register */
184 {4, 8, 16}, /* cost of loading SSE registers
185 in SImode, DImode and TImode */
186 {4, 8, 16}, /* cost of storing SSE registers
187 in SImode, DImode and TImode */
188 3, /* MMX or SSE register to integer */
189 0, /* size of l1 cache */
190 0, /* size of l2 cache */
191 0, /* size of prefetch block */
192 0, /* number of parallel prefetches */
194 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
195 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
196 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
197 COSTS_N_INSNS (22), /* cost of FABS instruction. */
198 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
199 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
200 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
201 DUMMY_STRINGOP_ALGS},
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 1, /* scalar_stmt_cost. */
205 1, /* scalar load_cost. */
206 1, /* scalar_store_cost. */
207 1, /* vec_stmt_cost. */
208 1, /* vec_to_scalar_cost. */
209 1, /* scalar_to_vec_cost. */
210 1, /* vec_align_load_cost. */
211 2, /* vec_unalign_load_cost. */
212 1, /* vec_store_cost. */
213 3, /* cond_taken_branch_cost. */
214 1, /* cond_not_taken_branch_cost. */
218 struct processor_costs i486_cost = { /* 486 specific costs */
219 COSTS_N_INSNS (1), /* cost of an add instruction */
220 COSTS_N_INSNS (1), /* cost of a lea instruction */
221 COSTS_N_INSNS (3), /* variable shift costs */
222 COSTS_N_INSNS (2), /* constant shift costs */
223 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
224 COSTS_N_INSNS (12), /* HI */
225 COSTS_N_INSNS (12), /* SI */
226 COSTS_N_INSNS (12), /* DI */
227 COSTS_N_INSNS (12)}, /* other */
228 1, /* cost of multiply per each bit set */
229 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
230 COSTS_N_INSNS (40), /* HI */
231 COSTS_N_INSNS (40), /* SI */
232 COSTS_N_INSNS (40), /* DI */
233 COSTS_N_INSNS (40)}, /* other */
234 COSTS_N_INSNS (3), /* cost of movsx */
235 COSTS_N_INSNS (2), /* cost of movzx */
236 15, /* "large" insn */
238 4, /* cost for loading QImode using movzbl */
239 {2, 4, 2}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 4, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {8, 8, 8}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {8, 8, 8}, /* cost of storing fp registers
247 in SFmode, DFmode and XFmode */
248 2, /* cost of moving MMX register */
249 {4, 8}, /* cost of loading MMX registers
250 in SImode and DImode */
251 {4, 8}, /* cost of storing MMX registers
252 in SImode and DImode */
253 2, /* cost of moving SSE register */
254 {4, 8, 16}, /* cost of loading SSE registers
255 in SImode, DImode and TImode */
256 {4, 8, 16}, /* cost of storing SSE registers
257 in SImode, DImode and TImode */
258 3, /* MMX or SSE register to integer */
259 4, /* size of l1 cache. 486 has 8kB cache
260 shared for code and data, so 4kB is
261 not really precise. */
262 4, /* size of l2 cache */
263 0, /* size of prefetch block */
264 0, /* number of parallel prefetches */
266 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
267 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
268 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
269 COSTS_N_INSNS (3), /* cost of FABS instruction. */
270 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
271 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
272 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
273 DUMMY_STRINGOP_ALGS},
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 1, /* scalar_stmt_cost. */
277 1, /* scalar load_cost. */
278 1, /* scalar_store_cost. */
279 1, /* vec_stmt_cost. */
280 1, /* vec_to_scalar_cost. */
281 1, /* scalar_to_vec_cost. */
282 1, /* vec_align_load_cost. */
283 2, /* vec_unalign_load_cost. */
284 1, /* vec_store_cost. */
285 3, /* cond_taken_branch_cost. */
286 1, /* cond_not_taken_branch_cost. */
290 struct processor_costs pentium_cost = {
291 COSTS_N_INSNS (1), /* cost of an add instruction */
292 COSTS_N_INSNS (1), /* cost of a lea instruction */
293 COSTS_N_INSNS (4), /* variable shift costs */
294 COSTS_N_INSNS (1), /* constant shift costs */
295 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
296 COSTS_N_INSNS (11), /* HI */
297 COSTS_N_INSNS (11), /* SI */
298 COSTS_N_INSNS (11), /* DI */
299 COSTS_N_INSNS (11)}, /* other */
300 0, /* cost of multiply per each bit set */
301 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
302 COSTS_N_INSNS (25), /* HI */
303 COSTS_N_INSNS (25), /* SI */
304 COSTS_N_INSNS (25), /* DI */
305 COSTS_N_INSNS (25)}, /* other */
306 COSTS_N_INSNS (3), /* cost of movsx */
307 COSTS_N_INSNS (2), /* cost of movzx */
308 8, /* "large" insn */
310 6, /* cost for loading QImode using movzbl */
311 {2, 4, 2}, /* cost of loading integer registers
312 in QImode, HImode and SImode.
313 Relative to reg-reg move (2). */
314 {2, 4, 2}, /* cost of storing integer registers */
315 2, /* cost of reg,reg fld/fst */
316 {2, 2, 6}, /* cost of loading fp registers
317 in SFmode, DFmode and XFmode */
318 {4, 4, 6}, /* cost of storing fp registers
319 in SFmode, DFmode and XFmode */
320 8, /* cost of moving MMX register */
321 {8, 8}, /* cost of loading MMX registers
322 in SImode and DImode */
323 {8, 8}, /* cost of storing MMX registers
324 in SImode and DImode */
325 2, /* cost of moving SSE register */
326 {4, 8, 16}, /* cost of loading SSE registers
327 in SImode, DImode and TImode */
328 {4, 8, 16}, /* cost of storing SSE registers
329 in SImode, DImode and TImode */
330 3, /* MMX or SSE register to integer */
331 8, /* size of l1 cache. */
332 8, /* size of l2 cache */
333 0, /* size of prefetch block */
334 0, /* number of parallel prefetches */
336 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
337 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
338 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
339 COSTS_N_INSNS (1), /* cost of FABS instruction. */
340 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
341 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
342 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
343 DUMMY_STRINGOP_ALGS},
344 {{libcall, {{-1, rep_prefix_4_byte}}},
345 DUMMY_STRINGOP_ALGS},
346 1, /* scalar_stmt_cost. */
347 1, /* scalar load_cost. */
348 1, /* scalar_store_cost. */
349 1, /* vec_stmt_cost. */
350 1, /* vec_to_scalar_cost. */
351 1, /* scalar_to_vec_cost. */
352 1, /* vec_align_load_cost. */
353 2, /* vec_unalign_load_cost. */
354 1, /* vec_store_cost. */
355 3, /* cond_taken_branch_cost. */
356 1, /* cond_not_taken_branch_cost. */
360 struct processor_costs pentiumpro_cost = {
361 COSTS_N_INSNS (1), /* cost of an add instruction */
362 COSTS_N_INSNS (1), /* cost of a lea instruction */
363 COSTS_N_INSNS (1), /* variable shift costs */
364 COSTS_N_INSNS (1), /* constant shift costs */
365 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
366 COSTS_N_INSNS (4), /* HI */
367 COSTS_N_INSNS (4), /* SI */
368 COSTS_N_INSNS (4), /* DI */
369 COSTS_N_INSNS (4)}, /* other */
370 0, /* cost of multiply per each bit set */
371 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
372 COSTS_N_INSNS (17), /* HI */
373 COSTS_N_INSNS (17), /* SI */
374 COSTS_N_INSNS (17), /* DI */
375 COSTS_N_INSNS (17)}, /* other */
376 COSTS_N_INSNS (1), /* cost of movsx */
377 COSTS_N_INSNS (1), /* cost of movzx */
378 8, /* "large" insn */
380 2, /* cost for loading QImode using movzbl */
381 {4, 4, 4}, /* cost of loading integer registers
382 in QImode, HImode and SImode.
383 Relative to reg-reg move (2). */
384 {2, 2, 2}, /* cost of storing integer registers */
385 2, /* cost of reg,reg fld/fst */
386 {2, 2, 6}, /* cost of loading fp registers
387 in SFmode, DFmode and XFmode */
388 {4, 4, 6}, /* cost of storing fp registers
389 in SFmode, DFmode and XFmode */
390 2, /* cost of moving MMX register */
391 {2, 2}, /* cost of loading MMX registers
392 in SImode and DImode */
393 {2, 2}, /* cost of storing MMX registers
394 in SImode and DImode */
395 2, /* cost of moving SSE register */
396 {2, 2, 8}, /* cost of loading SSE registers
397 in SImode, DImode and TImode */
398 {2, 2, 8}, /* cost of storing SSE registers
399 in SImode, DImode and TImode */
400 3, /* MMX or SSE register to integer */
401 8, /* size of l1 cache. */
402 256, /* size of l2 cache */
403 32, /* size of prefetch block */
404 6, /* number of parallel prefetches */
406 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
407 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
408 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
409 COSTS_N_INSNS (2), /* cost of FABS instruction. */
410 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
411 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
412 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
413 the alignment). For small blocks inline loop is still a noticeable win, for bigger
414 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
415 more expensive startup time in CPU, but after 4K the difference is down in the noise.
417 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
418 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
419 DUMMY_STRINGOP_ALGS},
420 {{rep_prefix_4_byte, {{1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, libcall}}},
422 DUMMY_STRINGOP_ALGS},
423 1, /* scalar_stmt_cost. */
424 1, /* scalar load_cost. */
425 1, /* scalar_store_cost. */
426 1, /* vec_stmt_cost. */
427 1, /* vec_to_scalar_cost. */
428 1, /* scalar_to_vec_cost. */
429 1, /* vec_align_load_cost. */
430 2, /* vec_unalign_load_cost. */
431 1, /* vec_store_cost. */
432 3, /* cond_taken_branch_cost. */
433 1, /* cond_not_taken_branch_cost. */
437 struct processor_costs geode_cost = {
438 COSTS_N_INSNS (1), /* cost of an add instruction */
439 COSTS_N_INSNS (1), /* cost of a lea instruction */
440 COSTS_N_INSNS (2), /* variable shift costs */
441 COSTS_N_INSNS (1), /* constant shift costs */
442 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
443 COSTS_N_INSNS (4), /* HI */
444 COSTS_N_INSNS (7), /* SI */
445 COSTS_N_INSNS (7), /* DI */
446 COSTS_N_INSNS (7)}, /* other */
447 0, /* cost of multiply per each bit set */
448 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
449 COSTS_N_INSNS (23), /* HI */
450 COSTS_N_INSNS (39), /* SI */
451 COSTS_N_INSNS (39), /* DI */
452 COSTS_N_INSNS (39)}, /* other */
453 COSTS_N_INSNS (1), /* cost of movsx */
454 COSTS_N_INSNS (1), /* cost of movzx */
455 8, /* "large" insn */
457 1, /* cost for loading QImode using movzbl */
458 {1, 1, 1}, /* cost of loading integer registers
459 in QImode, HImode and SImode.
460 Relative to reg-reg move (2). */
461 {1, 1, 1}, /* cost of storing integer registers */
462 1, /* cost of reg,reg fld/fst */
463 {1, 1, 1}, /* cost of loading fp registers
464 in SFmode, DFmode and XFmode */
465 {4, 6, 6}, /* cost of storing fp registers
466 in SFmode, DFmode and XFmode */
468 1, /* cost of moving MMX register */
469 {1, 1}, /* cost of loading MMX registers
470 in SImode and DImode */
471 {1, 1}, /* cost of storing MMX registers
472 in SImode and DImode */
473 1, /* cost of moving SSE register */
474 {1, 1, 1}, /* cost of loading SSE registers
475 in SImode, DImode and TImode */
476 {1, 1, 1}, /* cost of storing SSE registers
477 in SImode, DImode and TImode */
478 1, /* MMX or SSE register to integer */
479 64, /* size of l1 cache. */
480 128, /* size of l2 cache. */
481 32, /* size of prefetch block */
482 1, /* number of parallel prefetches */
484 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
485 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
486 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
487 COSTS_N_INSNS (1), /* cost of FABS instruction. */
488 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
489 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
490 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
491 DUMMY_STRINGOP_ALGS},
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 1, /* scalar_stmt_cost. */
495 1, /* scalar load_cost. */
496 1, /* scalar_store_cost. */
497 1, /* vec_stmt_cost. */
498 1, /* vec_to_scalar_cost. */
499 1, /* scalar_to_vec_cost. */
500 1, /* vec_align_load_cost. */
501 2, /* vec_unalign_load_cost. */
502 1, /* vec_store_cost. */
503 3, /* cond_taken_branch_cost. */
504 1, /* cond_not_taken_branch_cost. */
508 struct processor_costs k6_cost = {
509 COSTS_N_INSNS (1), /* cost of an add instruction */
510 COSTS_N_INSNS (2), /* cost of a lea instruction */
511 COSTS_N_INSNS (1), /* variable shift costs */
512 COSTS_N_INSNS (1), /* constant shift costs */
513 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
514 COSTS_N_INSNS (3), /* HI */
515 COSTS_N_INSNS (3), /* SI */
516 COSTS_N_INSNS (3), /* DI */
517 COSTS_N_INSNS (3)}, /* other */
518 0, /* cost of multiply per each bit set */
519 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
520 COSTS_N_INSNS (18), /* HI */
521 COSTS_N_INSNS (18), /* SI */
522 COSTS_N_INSNS (18), /* DI */
523 COSTS_N_INSNS (18)}, /* other */
524 COSTS_N_INSNS (2), /* cost of movsx */
525 COSTS_N_INSNS (2), /* cost of movzx */
526 8, /* "large" insn */
528 3, /* cost for loading QImode using movzbl */
529 {4, 5, 4}, /* cost of loading integer registers
530 in QImode, HImode and SImode.
531 Relative to reg-reg move (2). */
532 {2, 3, 2}, /* cost of storing integer registers */
533 4, /* cost of reg,reg fld/fst */
534 {6, 6, 6}, /* cost of loading fp registers
535 in SFmode, DFmode and XFmode */
536 {4, 4, 4}, /* cost of storing fp registers
537 in SFmode, DFmode and XFmode */
538 2, /* cost of moving MMX register */
539 {2, 2}, /* cost of loading MMX registers
540 in SImode and DImode */
541 {2, 2}, /* cost of storing MMX registers
542 in SImode and DImode */
543 2, /* cost of moving SSE register */
544 {2, 2, 8}, /* cost of loading SSE registers
545 in SImode, DImode and TImode */
546 {2, 2, 8}, /* cost of storing SSE registers
547 in SImode, DImode and TImode */
548 6, /* MMX or SSE register to integer */
549 32, /* size of l1 cache. */
550 32, /* size of l2 cache. Some models
551 have integrated l2 cache, but
552 optimizing for k6 is not important
553 enough to worry about that. */
554 32, /* size of prefetch block */
555 1, /* number of parallel prefetches */
557 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
558 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
559 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
562 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
563 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
564 DUMMY_STRINGOP_ALGS},
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 1, /* scalar_stmt_cost. */
568 1, /* scalar load_cost. */
569 1, /* scalar_store_cost. */
570 1, /* vec_stmt_cost. */
571 1, /* vec_to_scalar_cost. */
572 1, /* scalar_to_vec_cost. */
573 1, /* vec_align_load_cost. */
574 2, /* vec_unalign_load_cost. */
575 1, /* vec_store_cost. */
576 3, /* cond_taken_branch_cost. */
577 1, /* cond_not_taken_branch_cost. */
581 struct processor_costs athlon_cost = {
582 COSTS_N_INSNS (1), /* cost of an add instruction */
583 COSTS_N_INSNS (2), /* cost of a lea instruction */
584 COSTS_N_INSNS (1), /* variable shift costs */
585 COSTS_N_INSNS (1), /* constant shift costs */
586 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
587 COSTS_N_INSNS (5), /* HI */
588 COSTS_N_INSNS (5), /* SI */
589 COSTS_N_INSNS (5), /* DI */
590 COSTS_N_INSNS (5)}, /* other */
591 0, /* cost of multiply per each bit set */
592 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
593 COSTS_N_INSNS (26), /* HI */
594 COSTS_N_INSNS (42), /* SI */
595 COSTS_N_INSNS (74), /* DI */
596 COSTS_N_INSNS (74)}, /* other */
597 COSTS_N_INSNS (1), /* cost of movsx */
598 COSTS_N_INSNS (1), /* cost of movzx */
599 8, /* "large" insn */
601 4, /* cost for loading QImode using movzbl */
602 {3, 4, 3}, /* cost of loading integer registers
603 in QImode, HImode and SImode.
604 Relative to reg-reg move (2). */
605 {3, 4, 3}, /* cost of storing integer registers */
606 4, /* cost of reg,reg fld/fst */
607 {4, 4, 12}, /* cost of loading fp registers
608 in SFmode, DFmode and XFmode */
609 {6, 6, 8}, /* cost of storing fp registers
610 in SFmode, DFmode and XFmode */
611 2, /* cost of moving MMX register */
612 {4, 4}, /* cost of loading MMX registers
613 in SImode and DImode */
614 {4, 4}, /* cost of storing MMX registers
615 in SImode and DImode */
616 2, /* cost of moving SSE register */
617 {4, 4, 6}, /* cost of loading SSE registers
618 in SImode, DImode and TImode */
619 {4, 4, 5}, /* cost of storing SSE registers
620 in SImode, DImode and TImode */
621 5, /* MMX or SSE register to integer */
622 64, /* size of l1 cache. */
623 256, /* size of l2 cache. */
624 64, /* size of prefetch block */
625 6, /* number of parallel prefetches */
627 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
633 /* For some reason, Athlon deals better with REP prefix (relative to loops)
634 compared to K8. Alignment becomes important after 8 bytes for memcpy and
635 128 bytes for memset. */
636 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
637 DUMMY_STRINGOP_ALGS},
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs k8_cost = {
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (2), /* cost of a lea instruction */
657 COSTS_N_INSNS (1), /* variable shift costs */
658 COSTS_N_INSNS (1), /* constant shift costs */
659 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (4), /* HI */
661 COSTS_N_INSNS (3), /* SI */
662 COSTS_N_INSNS (4), /* DI */
663 COSTS_N_INSNS (5)}, /* other */
664 0, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (26), /* HI */
667 COSTS_N_INSNS (42), /* SI */
668 COSTS_N_INSNS (74), /* DI */
669 COSTS_N_INSNS (74)}, /* other */
670 COSTS_N_INSNS (1), /* cost of movsx */
671 COSTS_N_INSNS (1), /* cost of movzx */
672 8, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {3, 4, 3}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {3, 4, 3}, /* cost of storing integer registers */
679 4, /* cost of reg,reg fld/fst */
680 {4, 4, 12}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {6, 6, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {3, 3}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 4}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 3, 6}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 4, 5}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 5, /* MMX or SSE register to integer */
695 64, /* size of l1 cache. */
696 512, /* size of l2 cache. */
697 64, /* size of prefetch block */
698 /* New AMD processors never drop prefetches; if they cannot be performed
699 immediately, they are queued. We set number of simultaneous prefetches
700 to a large constant to reflect this (it probably is not a good idea not
701 to limit number of prefetches at all, as their execution also takes some
703 100, /* number of parallel prefetches */
705 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (2), /* cost of FABS instruction. */
709 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
711 /* K8 has optimized REP instruction for medium sized blocks, but for very small
712 blocks it is better to use loop. For large blocks, libcall can do
713 nontemporary accesses and beat inline considerably. */
714 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
715 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
716 {{libcall, {{8, loop}, {24, unrolled_loop},
717 {2048, rep_prefix_4_byte}, {-1, libcall}}},
718 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 4, /* scalar_stmt_cost. */
720 2, /* scalar load_cost. */
721 2, /* scalar_store_cost. */
722 5, /* vec_stmt_cost. */
723 0, /* vec_to_scalar_cost. */
724 2, /* scalar_to_vec_cost. */
725 2, /* vec_align_load_cost. */
726 3, /* vec_unalign_load_cost. */
727 3, /* vec_store_cost. */
728 3, /* cond_taken_branch_cost. */
729 2, /* cond_not_taken_branch_cost. */
732 struct processor_costs amdfam10_cost = {
733 COSTS_N_INSNS (1), /* cost of an add instruction */
734 COSTS_N_INSNS (2), /* cost of a lea instruction */
735 COSTS_N_INSNS (1), /* variable shift costs */
736 COSTS_N_INSNS (1), /* constant shift costs */
737 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
738 COSTS_N_INSNS (4), /* HI */
739 COSTS_N_INSNS (3), /* SI */
740 COSTS_N_INSNS (4), /* DI */
741 COSTS_N_INSNS (5)}, /* other */
742 0, /* cost of multiply per each bit set */
743 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
744 COSTS_N_INSNS (35), /* HI */
745 COSTS_N_INSNS (51), /* SI */
746 COSTS_N_INSNS (83), /* DI */
747 COSTS_N_INSNS (83)}, /* other */
748 COSTS_N_INSNS (1), /* cost of movsx */
749 COSTS_N_INSNS (1), /* cost of movzx */
750 8, /* "large" insn */
752 4, /* cost for loading QImode using movzbl */
753 {3, 4, 3}, /* cost of loading integer registers
754 in QImode, HImode and SImode.
755 Relative to reg-reg move (2). */
756 {3, 4, 3}, /* cost of storing integer registers */
757 4, /* cost of reg,reg fld/fst */
758 {4, 4, 12}, /* cost of loading fp registers
759 in SFmode, DFmode and XFmode */
760 {6, 6, 8}, /* cost of storing fp registers
761 in SFmode, DFmode and XFmode */
762 2, /* cost of moving MMX register */
763 {3, 3}, /* cost of loading MMX registers
764 in SImode and DImode */
765 {4, 4}, /* cost of storing MMX registers
766 in SImode and DImode */
767 2, /* cost of moving SSE register */
768 {4, 4, 3}, /* cost of loading SSE registers
769 in SImode, DImode and TImode */
770 {4, 4, 5}, /* cost of storing SSE registers
771 in SImode, DImode and TImode */
772 3, /* MMX or SSE register to integer */
774 MOVD reg64, xmmreg Double FSTORE 4
775 MOVD reg32, xmmreg Double FSTORE 4
777 MOVD reg64, xmmreg Double FADD 3
779 MOVD reg32, xmmreg Double FADD 3
781 64, /* size of l1 cache. */
782 512, /* size of l2 cache. */
783 64, /* size of prefetch block */
784 /* New AMD processors never drop prefetches; if they cannot be performed
785 immediately, they are queued. We set number of simultaneous prefetches
786 to a large constant to reflect this (it probably is not a good idea not
787 to limit number of prefetches at all, as their execution also takes some
789 100, /* number of parallel prefetches */
791 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
792 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
793 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
794 COSTS_N_INSNS (2), /* cost of FABS instruction. */
795 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
796 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
798 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
799 very small blocks it is better to use loop. For large blocks, libcall can
800 do nontemporary accesses and beat inline considerably. */
801 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
802 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
803 {{libcall, {{8, loop}, {24, unrolled_loop},
804 {2048, rep_prefix_4_byte}, {-1, libcall}}},
805 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 4, /* scalar_stmt_cost. */
807 2, /* scalar load_cost. */
808 2, /* scalar_store_cost. */
809 6, /* vec_stmt_cost. */
810 0, /* vec_to_scalar_cost. */
811 2, /* scalar_to_vec_cost. */
812 2, /* vec_align_load_cost. */
813 2, /* vec_unalign_load_cost. */
814 2, /* vec_store_cost. */
815 2, /* cond_taken_branch_cost. */
816 1, /* cond_not_taken_branch_cost. */
820 struct processor_costs pentium4_cost = {
821 COSTS_N_INSNS (1), /* cost of an add instruction */
822 COSTS_N_INSNS (3), /* cost of a lea instruction */
823 COSTS_N_INSNS (4), /* variable shift costs */
824 COSTS_N_INSNS (4), /* constant shift costs */
825 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
826 COSTS_N_INSNS (15), /* HI */
827 COSTS_N_INSNS (15), /* SI */
828 COSTS_N_INSNS (15), /* DI */
829 COSTS_N_INSNS (15)}, /* other */
830 0, /* cost of multiply per each bit set */
831 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
832 COSTS_N_INSNS (56), /* HI */
833 COSTS_N_INSNS (56), /* SI */
834 COSTS_N_INSNS (56), /* DI */
835 COSTS_N_INSNS (56)}, /* other */
836 COSTS_N_INSNS (1), /* cost of movsx */
837 COSTS_N_INSNS (1), /* cost of movzx */
838 16, /* "large" insn */
840 2, /* cost for loading QImode using movzbl */
841 {4, 5, 4}, /* cost of loading integer registers
842 in QImode, HImode and SImode.
843 Relative to reg-reg move (2). */
844 {2, 3, 2}, /* cost of storing integer registers */
845 2, /* cost of reg,reg fld/fst */
846 {2, 2, 6}, /* cost of loading fp registers
847 in SFmode, DFmode and XFmode */
848 {4, 4, 6}, /* cost of storing fp registers
849 in SFmode, DFmode and XFmode */
850 2, /* cost of moving MMX register */
851 {2, 2}, /* cost of loading MMX registers
852 in SImode and DImode */
853 {2, 2}, /* cost of storing MMX registers
854 in SImode and DImode */
855 12, /* cost of moving SSE register */
856 {12, 12, 12}, /* cost of loading SSE registers
857 in SImode, DImode and TImode */
858 {2, 2, 8}, /* cost of storing SSE registers
859 in SImode, DImode and TImode */
860 10, /* MMX or SSE register to integer */
861 8, /* size of l1 cache. */
862 256, /* size of l2 cache. */
863 64, /* size of prefetch block */
864 6, /* number of parallel prefetches */
866 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
867 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
868 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
869 COSTS_N_INSNS (2), /* cost of FABS instruction. */
870 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
871 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
872 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
873 DUMMY_STRINGOP_ALGS},
874 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
876 DUMMY_STRINGOP_ALGS},
877 1, /* scalar_stmt_cost. */
878 1, /* scalar load_cost. */
879 1, /* scalar_store_cost. */
880 1, /* vec_stmt_cost. */
881 1, /* vec_to_scalar_cost. */
882 1, /* scalar_to_vec_cost. */
883 1, /* vec_align_load_cost. */
884 2, /* vec_unalign_load_cost. */
885 1, /* vec_store_cost. */
886 3, /* cond_taken_branch_cost. */
887 1, /* cond_not_taken_branch_cost. */
891 struct processor_costs nocona_cost = {
892 COSTS_N_INSNS (1), /* cost of an add instruction */
893 COSTS_N_INSNS (1), /* cost of a lea instruction */
894 COSTS_N_INSNS (1), /* variable shift costs */
895 COSTS_N_INSNS (1), /* constant shift costs */
896 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
897 COSTS_N_INSNS (10), /* HI */
898 COSTS_N_INSNS (10), /* SI */
899 COSTS_N_INSNS (10), /* DI */
900 COSTS_N_INSNS (10)}, /* other */
901 0, /* cost of multiply per each bit set */
902 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
903 COSTS_N_INSNS (66), /* HI */
904 COSTS_N_INSNS (66), /* SI */
905 COSTS_N_INSNS (66), /* DI */
906 COSTS_N_INSNS (66)}, /* other */
907 COSTS_N_INSNS (1), /* cost of movsx */
908 COSTS_N_INSNS (1), /* cost of movzx */
909 16, /* "large" insn */
911 4, /* cost for loading QImode using movzbl */
912 {4, 4, 4}, /* cost of loading integer registers
913 in QImode, HImode and SImode.
914 Relative to reg-reg move (2). */
915 {4, 4, 4}, /* cost of storing integer registers */
916 3, /* cost of reg,reg fld/fst */
917 {12, 12, 12}, /* cost of loading fp registers
918 in SFmode, DFmode and XFmode */
919 {4, 4, 4}, /* cost of storing fp registers
920 in SFmode, DFmode and XFmode */
921 6, /* cost of moving MMX register */
922 {12, 12}, /* cost of loading MMX registers
923 in SImode and DImode */
924 {12, 12}, /* cost of storing MMX registers
925 in SImode and DImode */
926 6, /* cost of moving SSE register */
927 {12, 12, 12}, /* cost of loading SSE registers
928 in SImode, DImode and TImode */
929 {12, 12, 12}, /* cost of storing SSE registers
930 in SImode, DImode and TImode */
931 8, /* MMX or SSE register to integer */
932 8, /* size of l1 cache. */
933 1024, /* size of l2 cache. */
934 128, /* size of prefetch block */
935 8, /* number of parallel prefetches */
937 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
938 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
939 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
940 COSTS_N_INSNS (3), /* cost of FABS instruction. */
941 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
942 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
943 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
944 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
945 {100000, unrolled_loop}, {-1, libcall}}}},
946 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
948 {libcall, {{24, loop}, {64, unrolled_loop},
949 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
950 1, /* scalar_stmt_cost. */
951 1, /* scalar load_cost. */
952 1, /* scalar_store_cost. */
953 1, /* vec_stmt_cost. */
954 1, /* vec_to_scalar_cost. */
955 1, /* scalar_to_vec_cost. */
956 1, /* vec_align_load_cost. */
957 2, /* vec_unalign_load_cost. */
958 1, /* vec_store_cost. */
959 3, /* cond_taken_branch_cost. */
960 1, /* cond_not_taken_branch_cost. */
964 struct processor_costs core2_cost = {
965 COSTS_N_INSNS (1), /* cost of an add instruction */
966 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
967 COSTS_N_INSNS (1), /* variable shift costs */
968 COSTS_N_INSNS (1), /* constant shift costs */
969 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
970 COSTS_N_INSNS (3), /* HI */
971 COSTS_N_INSNS (3), /* SI */
972 COSTS_N_INSNS (3), /* DI */
973 COSTS_N_INSNS (3)}, /* other */
974 0, /* cost of multiply per each bit set */
975 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
976 COSTS_N_INSNS (22), /* HI */
977 COSTS_N_INSNS (22), /* SI */
978 COSTS_N_INSNS (22), /* DI */
979 COSTS_N_INSNS (22)}, /* other */
980 COSTS_N_INSNS (1), /* cost of movsx */
981 COSTS_N_INSNS (1), /* cost of movzx */
982 8, /* "large" insn */
984 2, /* cost for loading QImode using movzbl */
985 {6, 6, 6}, /* cost of loading integer registers
986 in QImode, HImode and SImode.
987 Relative to reg-reg move (2). */
988 {4, 4, 4}, /* cost of storing integer registers */
989 2, /* cost of reg,reg fld/fst */
990 {6, 6, 6}, /* cost of loading fp registers
991 in SFmode, DFmode and XFmode */
992 {4, 4, 4}, /* cost of loading integer registers */
993 2, /* cost of moving MMX register */
994 {6, 6}, /* cost of loading MMX registers
995 in SImode and DImode */
996 {4, 4}, /* cost of storing MMX registers
997 in SImode and DImode */
998 2, /* cost of moving SSE register */
999 {6, 6, 6}, /* cost of loading SSE registers
1000 in SImode, DImode and TImode */
1001 {4, 4, 4}, /* cost of storing SSE registers
1002 in SImode, DImode and TImode */
1003 2, /* MMX or SSE register to integer */
1004 32, /* size of l1 cache. */
1005 2048, /* size of l2 cache. */
1006 128, /* size of prefetch block */
1007 8, /* number of parallel prefetches */
1008 3, /* Branch cost */
1009 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1010 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1011 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1012 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1013 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1014 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1015 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1016 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1017 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1018 {{libcall, {{8, loop}, {15, unrolled_loop},
1019 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1020 {libcall, {{24, loop}, {32, unrolled_loop},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 1, /* scalar_stmt_cost. */
1023 1, /* scalar load_cost. */
1024 1, /* scalar_store_cost. */
1025 1, /* vec_stmt_cost. */
1026 1, /* vec_to_scalar_cost. */
1027 1, /* scalar_to_vec_cost. */
1028 1, /* vec_align_load_cost. */
1029 2, /* vec_unalign_load_cost. */
1030 1, /* vec_store_cost. */
1031 3, /* cond_taken_branch_cost. */
1032 1, /* cond_not_taken_branch_cost. */
1035 /* Generic64 should produce code tuned for Nocona and K8. */
1037 struct processor_costs generic64_cost = {
1038 COSTS_N_INSNS (1), /* cost of an add instruction */
1039 /* On all chips taken into consideration lea is 2 cycles and more. With
1040 this cost however our current implementation of synth_mult results in
1041 use of unnecessary temporary registers causing regression on several
1042 SPECfp benchmarks. */
1043 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1044 COSTS_N_INSNS (1), /* variable shift costs */
1045 COSTS_N_INSNS (1), /* constant shift costs */
1046 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1047 COSTS_N_INSNS (4), /* HI */
1048 COSTS_N_INSNS (3), /* SI */
1049 COSTS_N_INSNS (4), /* DI */
1050 COSTS_N_INSNS (2)}, /* other */
1051 0, /* cost of multiply per each bit set */
1052 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1053 COSTS_N_INSNS (26), /* HI */
1054 COSTS_N_INSNS (42), /* SI */
1055 COSTS_N_INSNS (74), /* DI */
1056 COSTS_N_INSNS (74)}, /* other */
1057 COSTS_N_INSNS (1), /* cost of movsx */
1058 COSTS_N_INSNS (1), /* cost of movzx */
1059 8, /* "large" insn */
1060 17, /* MOVE_RATIO */
1061 4, /* cost for loading QImode using movzbl */
1062 {4, 4, 4}, /* cost of loading integer registers
1063 in QImode, HImode and SImode.
1064 Relative to reg-reg move (2). */
1065 {4, 4, 4}, /* cost of storing integer registers */
1066 4, /* cost of reg,reg fld/fst */
1067 {12, 12, 12}, /* cost of loading fp registers
1068 in SFmode, DFmode and XFmode */
1069 {6, 6, 8}, /* cost of storing fp registers
1070 in SFmode, DFmode and XFmode */
1071 2, /* cost of moving MMX register */
1072 {8, 8}, /* cost of loading MMX registers
1073 in SImode and DImode */
1074 {8, 8}, /* cost of storing MMX registers
1075 in SImode and DImode */
1076 2, /* cost of moving SSE register */
1077 {8, 8, 8}, /* cost of loading SSE registers
1078 in SImode, DImode and TImode */
1079 {8, 8, 8}, /* cost of storing SSE registers
1080 in SImode, DImode and TImode */
1081 5, /* MMX or SSE register to integer */
1082 32, /* size of l1 cache. */
1083 512, /* size of l2 cache. */
1084 64, /* size of prefetch block */
1085 6, /* number of parallel prefetches */
1086 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1087 is increased to perhaps more appropriate value of 5. */
1088 3, /* Branch cost */
1089 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1090 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1091 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1092 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1093 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1094 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1095 {DUMMY_STRINGOP_ALGS,
1096 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1112 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1114 struct processor_costs generic32_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1117 COSTS_N_INSNS (1), /* variable shift costs */
1118 COSTS_N_INSNS (1), /* constant shift costs */
1119 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1120 COSTS_N_INSNS (4), /* HI */
1121 COSTS_N_INSNS (3), /* SI */
1122 COSTS_N_INSNS (4), /* DI */
1123 COSTS_N_INSNS (2)}, /* other */
1124 0, /* cost of multiply per each bit set */
1125 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1126 COSTS_N_INSNS (26), /* HI */
1127 COSTS_N_INSNS (42), /* SI */
1128 COSTS_N_INSNS (74), /* DI */
1129 COSTS_N_INSNS (74)}, /* other */
1130 COSTS_N_INSNS (1), /* cost of movsx */
1131 COSTS_N_INSNS (1), /* cost of movzx */
1132 8, /* "large" insn */
1133 17, /* MOVE_RATIO */
1134 4, /* cost for loading QImode using movzbl */
1135 {4, 4, 4}, /* cost of loading integer registers
1136 in QImode, HImode and SImode.
1137 Relative to reg-reg move (2). */
1138 {4, 4, 4}, /* cost of storing integer registers */
1139 4, /* cost of reg,reg fld/fst */
1140 {12, 12, 12}, /* cost of loading fp registers
1141 in SFmode, DFmode and XFmode */
1142 {6, 6, 8}, /* cost of storing fp registers
1143 in SFmode, DFmode and XFmode */
1144 2, /* cost of moving MMX register */
1145 {8, 8}, /* cost of loading MMX registers
1146 in SImode and DImode */
1147 {8, 8}, /* cost of storing MMX registers
1148 in SImode and DImode */
1149 2, /* cost of moving SSE register */
1150 {8, 8, 8}, /* cost of loading SSE registers
1151 in SImode, DImode and TImode */
1152 {8, 8, 8}, /* cost of storing SSE registers
1153 in SImode, DImode and TImode */
1154 5, /* MMX or SSE register to integer */
1155 32, /* size of l1 cache. */
1156 256, /* size of l2 cache. */
1157 64, /* size of prefetch block */
1158 6, /* number of parallel prefetches */
1159 3, /* Branch cost */
1160 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1161 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1162 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1163 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1164 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1165 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1166 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1167 DUMMY_STRINGOP_ALGS},
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 1, /* scalar_stmt_cost. */
1171 1, /* scalar load_cost. */
1172 1, /* scalar_store_cost. */
1173 1, /* vec_stmt_cost. */
1174 1, /* vec_to_scalar_cost. */
1175 1, /* scalar_to_vec_cost. */
1176 1, /* vec_align_load_cost. */
1177 2, /* vec_unalign_load_cost. */
1178 1, /* vec_store_cost. */
1179 3, /* cond_taken_branch_cost. */
1180 1, /* cond_not_taken_branch_cost. */
1183 const struct processor_costs *ix86_cost = &pentium_cost;
1185 /* Processor feature/optimization bitmasks. */
1186 #define m_386 (1<<PROCESSOR_I386)
1187 #define m_486 (1<<PROCESSOR_I486)
1188 #define m_PENT (1<<PROCESSOR_PENTIUM)
1189 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1190 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1191 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1192 #define m_CORE2 (1<<PROCESSOR_CORE2)
1194 #define m_GEODE (1<<PROCESSOR_GEODE)
1195 #define m_K6 (1<<PROCESSOR_K6)
1196 #define m_K6_GEODE (m_K6 | m_GEODE)
1197 #define m_K8 (1<<PROCESSOR_K8)
1198 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1199 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1200 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1201 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1203 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1204 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1206 /* Generic instruction choice should be common subset of supported CPUs
1207 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1208 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1210 /* Feature tests against the various tunings. */
1211 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1212 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1213 negatively, so enabling for Generic64 seems like good code size
1214 tradeoff. We can't enable it for 32bit generic because it does not
1215 work well with PPro base chips. */
1216 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1218 /* X86_TUNE_PUSH_MEMORY */
1219 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1220 | m_NOCONA | m_CORE2 | m_GENERIC,
1222 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1225 /* X86_TUNE_USE_BIT_TEST */
1228 /* X86_TUNE_UNROLL_STRLEN */
1229 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1231 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1232 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1234 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1235 on simulation result. But after P4 was made, no performance benefit
1236 was observed with branch hints. It also increases the code size.
1237 As a result, icc never generates branch hints. */
1240 /* X86_TUNE_DOUBLE_WITH_ADD */
1243 /* X86_TUNE_USE_SAHF */
1244 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1245 | m_NOCONA | m_CORE2 | m_GENERIC,
1247 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1248 partial dependencies. */
1249 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1250 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1252 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1253 register stalls on Generic32 compilation setting as well. However
1254 in current implementation the partial register stalls are not eliminated
1255 very well - they can be introduced via subregs synthesized by combine
1256 and can happen in caller/callee saving sequences. Because this option
1257 pays back little on PPro based chips and is in conflict with partial reg
1258 dependencies used by Athlon/P4 based chips, it is better to leave it off
1259 for generic32 for now. */
1262 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1263 m_CORE2 | m_GENERIC,
1265 /* X86_TUNE_USE_HIMODE_FIOP */
1266 m_386 | m_486 | m_K6_GEODE,
1268 /* X86_TUNE_USE_SIMODE_FIOP */
1269 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1271 /* X86_TUNE_USE_MOV0 */
1274 /* X86_TUNE_USE_CLTD */
1275 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1277 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1280 /* X86_TUNE_SPLIT_LONG_MOVES */
1283 /* X86_TUNE_READ_MODIFY_WRITE */
1286 /* X86_TUNE_READ_MODIFY */
1289 /* X86_TUNE_PROMOTE_QIMODE */
1290 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1291 | m_GENERIC /* | m_PENT4 ? */,
1293 /* X86_TUNE_FAST_PREFIX */
1294 ~(m_PENT | m_486 | m_386),
1296 /* X86_TUNE_SINGLE_STRINGOP */
1297 m_386 | m_PENT4 | m_NOCONA,
1299 /* X86_TUNE_QIMODE_MATH */
1302 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1303 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1304 might be considered for Generic32 if our scheme for avoiding partial
1305 stalls was more effective. */
1308 /* X86_TUNE_PROMOTE_QI_REGS */
1311 /* X86_TUNE_PROMOTE_HI_REGS */
1314 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1315 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1317 /* X86_TUNE_ADD_ESP_8 */
1318 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1319 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1321 /* X86_TUNE_SUB_ESP_4 */
1322 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1324 /* X86_TUNE_SUB_ESP_8 */
1325 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1326 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1328 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1329 for DFmode copies */
1330 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1331 | m_GENERIC | m_GEODE),
1333 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1334 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1336 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1337 conflict here in between PPro/Pentium4 based chips that thread 128bit
1338 SSE registers as single units versus K8 based chips that divide SSE
1339 registers to two 64bit halves. This knob promotes all store destinations
1340 to be 128bit to allow register renaming on 128bit SSE units, but usually
1341 results in one extra microop on 64bit SSE units. Experimental results
1342 shows that disabling this option on P4 brings over 20% SPECfp regression,
1343 while enabling it on K8 brings roughly 2.4% regression that can be partly
1344 masked by careful scheduling of moves. */
1345 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1347 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1350 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1351 are resolved on SSE register parts instead of whole registers, so we may
1352 maintain just lower part of scalar values in proper format leaving the
1353 upper part undefined. */
1356 /* X86_TUNE_SSE_TYPELESS_STORES */
1359 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1360 m_PPRO | m_PENT4 | m_NOCONA,
1362 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1363 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1365 /* X86_TUNE_PROLOGUE_USING_MOVE */
1366 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1368 /* X86_TUNE_EPILOGUE_USING_MOVE */
1369 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1371 /* X86_TUNE_SHIFT1 */
1374 /* X86_TUNE_USE_FFREEP */
1377 /* X86_TUNE_INTER_UNIT_MOVES */
1378 ~(m_AMD_MULTIPLE | m_GENERIC),
1380 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1383 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1384 than 4 branch instructions in the 16 byte window. */
1385 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1387 /* X86_TUNE_SCHEDULE */
1388 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1390 /* X86_TUNE_USE_BT */
1393 /* X86_TUNE_USE_INCDEC */
1394 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1396 /* X86_TUNE_PAD_RETURNS */
1397 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1399 /* X86_TUNE_EXT_80387_CONSTANTS */
1400 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1402 /* X86_TUNE_SHORTEN_X87_SSE */
1405 /* X86_TUNE_AVOID_VECTOR_DECODE */
1408 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1409 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1412 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1413 vector path on AMD machines. */
1414 m_K8 | m_GENERIC64 | m_AMDFAM10,
1416 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1418 m_K8 | m_GENERIC64 | m_AMDFAM10,
1420 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1424 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1425 but one byte longer. */
1428 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1429 operand that cannot be represented using a modRM byte. The XOR
1430 replacement is long decoded, so this split helps here as well. */
1433 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1434 from integer to FP. */
1438 /* Feature tests against the various architecture variations. */
1439 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1440 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1441 ~(m_386 | m_486 | m_PENT | m_K6),
1443 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1446 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1449 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1452 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1456 static const unsigned int x86_accumulate_outgoing_args
1457 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1459 static const unsigned int x86_arch_always_fancy_math_387
1460 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1461 | m_NOCONA | m_CORE2 | m_GENERIC;
1463 static enum stringop_alg stringop_alg = no_stringop;
1465 /* In case the average insn count for single function invocation is
1466 lower than this constant, emit fast (but longer) prologue and
1468 #define FAST_PROLOGUE_INSN_COUNT 20
1470 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1471 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1472 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1473 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1475 /* Array of the smallest class containing reg number REGNO, indexed by
1476 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1478 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1480 /* ax, dx, cx, bx */
1481 AREG, DREG, CREG, BREG,
1482 /* si, di, bp, sp */
1483 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1485 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1486 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1489 /* flags, fpsr, fpcr, frame */
1490 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1492 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1495 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1498 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1499 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1500 /* SSE REX registers */
1501 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1505 /* The "default" register map used in 32bit mode. */
1507 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1509 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1510 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1511 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1512 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1513 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1515 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1518 static int const x86_64_int_parameter_registers[6] =
1520 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1521 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1524 static int const x86_64_ms_abi_int_parameter_registers[4] =
1526 2 /*RCX*/, 1 /*RDX*/,
1527 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1530 static int const x86_64_int_return_registers[4] =
1532 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1535 /* The "default" register map used in 64bit mode. */
1536 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1538 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1539 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1540 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1541 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1542 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1543 8,9,10,11,12,13,14,15, /* extended integer registers */
1544 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1547 /* Define the register numbers to be used in Dwarf debugging information.
1548 The SVR4 reference port C compiler uses the following register numbers
1549 in its Dwarf output code:
1550 0 for %eax (gcc regno = 0)
1551 1 for %ecx (gcc regno = 2)
1552 2 for %edx (gcc regno = 1)
1553 3 for %ebx (gcc regno = 3)
1554 4 for %esp (gcc regno = 7)
1555 5 for %ebp (gcc regno = 6)
1556 6 for %esi (gcc regno = 4)
1557 7 for %edi (gcc regno = 5)
1558 The following three DWARF register numbers are never generated by
1559 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1560 believes these numbers have these meanings.
1561 8 for %eip (no gcc equivalent)
1562 9 for %eflags (gcc regno = 17)
1563 10 for %trapno (no gcc equivalent)
1564 It is not at all clear how we should number the FP stack registers
1565 for the x86 architecture. If the version of SDB on x86/svr4 were
1566 a bit less brain dead with respect to floating-point then we would
1567 have a precedent to follow with respect to DWARF register numbers
1568 for x86 FP registers, but the SDB on x86/svr4 is so completely
1569 broken with respect to FP registers that it is hardly worth thinking
1570 of it as something to strive for compatibility with.
1571 The version of x86/svr4 SDB I have at the moment does (partially)
1572 seem to believe that DWARF register number 11 is associated with
1573 the x86 register %st(0), but that's about all. Higher DWARF
1574 register numbers don't seem to be associated with anything in
1575 particular, and even for DWARF regno 11, SDB only seems to under-
1576 stand that it should say that a variable lives in %st(0) (when
1577 asked via an `=' command) if we said it was in DWARF regno 11,
1578 but SDB still prints garbage when asked for the value of the
1579 variable in question (via a `/' command).
1580 (Also note that the labels SDB prints for various FP stack regs
1581 when doing an `x' command are all wrong.)
1582 Note that these problems generally don't affect the native SVR4
1583 C compiler because it doesn't allow the use of -O with -g and
1584 because when it is *not* optimizing, it allocates a memory
1585 location for each floating-point variable, and the memory
1586 location is what gets described in the DWARF AT_location
1587 attribute for the variable in question.
1588 Regardless of the severe mental illness of the x86/svr4 SDB, we
1589 do something sensible here and we use the following DWARF
1590 register numbers. Note that these are all stack-top-relative
1592 11 for %st(0) (gcc regno = 8)
1593 12 for %st(1) (gcc regno = 9)
1594 13 for %st(2) (gcc regno = 10)
1595 14 for %st(3) (gcc regno = 11)
1596 15 for %st(4) (gcc regno = 12)
1597 16 for %st(5) (gcc regno = 13)
1598 17 for %st(6) (gcc regno = 14)
1599 18 for %st(7) (gcc regno = 15)
1601 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1603 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1604 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1605 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1606 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1607 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1609 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1612 /* Test and compare insns in i386.md store the information needed to
1613 generate branch and scc insns here. */
1615 rtx ix86_compare_op0 = NULL_RTX;
1616 rtx ix86_compare_op1 = NULL_RTX;
1617 rtx ix86_compare_emitted = NULL_RTX;
1619 /* Size of the register save area. */
1620 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1622 /* Define the structure for the machine field in struct function. */
1624 struct stack_local_entry GTY(())
1626 unsigned short mode;
1629 struct stack_local_entry *next;
1632 /* Structure describing stack frame layout.
1633 Stack grows downward:
1639 saved frame pointer if frame_pointer_needed
1640 <- HARD_FRAME_POINTER
1645 [va_arg registers] (
1646 > to_allocate <- FRAME_POINTER
1656 HOST_WIDE_INT frame;
1658 int outgoing_arguments_size;
1661 HOST_WIDE_INT to_allocate;
1662 /* The offsets relative to ARG_POINTER. */
1663 HOST_WIDE_INT frame_pointer_offset;
1664 HOST_WIDE_INT hard_frame_pointer_offset;
1665 HOST_WIDE_INT stack_pointer_offset;
1667 /* When save_regs_using_mov is set, emit prologue using
1668 move instead of push instructions. */
1669 bool save_regs_using_mov;
1672 /* Code model option. */
1673 enum cmodel ix86_cmodel;
1675 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1677 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1679 /* Which unit we are generating floating point math for. */
1680 enum fpmath_unit ix86_fpmath;
1682 /* Which cpu are we scheduling for. */
1683 enum processor_type ix86_tune;
1685 /* Which instruction set architecture to use. */
1686 enum processor_type ix86_arch;
1688 /* true if sse prefetch instruction is not NOOP. */
1689 int x86_prefetch_sse;
1691 /* ix86_regparm_string as a number */
1692 static int ix86_regparm;
1694 /* -mstackrealign option */
1695 extern int ix86_force_align_arg_pointer;
1696 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1698 /* Preferred alignment for stack boundary in bits. */
1699 unsigned int ix86_preferred_stack_boundary;
1701 /* Values 1-5: see jump.c */
1702 int ix86_branch_cost;
1704 /* Variables which are this size or smaller are put in the data/bss
1705 or ldata/lbss sections. */
1707 int ix86_section_threshold = 65536;
1709 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1710 char internal_label_prefix[16];
1711 int internal_label_prefix_len;
1713 /* Fence to use after loop using movnt. */
1716 /* Register class used for passing given 64bit part of the argument.
1717 These represent classes as documented by the PS ABI, with the exception
1718 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1719 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1721 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1722 whenever possible (upper half does contain padding). */
1723 enum x86_64_reg_class
1726 X86_64_INTEGER_CLASS,
1727 X86_64_INTEGERSI_CLASS,
1734 X86_64_COMPLEX_X87_CLASS,
1737 static const char * const x86_64_reg_class_name[] =
1739 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1740 "sseup", "x87", "x87up", "cplx87", "no"
1743 #define MAX_CLASSES 4
1745 /* Table of constants used by fldpi, fldln2, etc.... */
1746 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1747 static bool ext_80387_constants_init = 0;
1750 static struct machine_function * ix86_init_machine_status (void);
1751 static rtx ix86_function_value (const_tree, const_tree, bool);
1752 static int ix86_function_regparm (const_tree, const_tree);
1753 static void ix86_compute_frame_layout (struct ix86_frame *);
1754 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1758 /* The svr4 ABI for the i386 says that records and unions are returned
1760 #ifndef DEFAULT_PCC_STRUCT_RETURN
1761 #define DEFAULT_PCC_STRUCT_RETURN 1
1764 /* Bit flags that specify the ISA we are compiling for. */
1765 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1767 /* A mask of ix86_isa_flags that includes bit X if X
1768 was set or cleared on the command line. */
1769 static int ix86_isa_flags_explicit;
1771 /* Define a set of ISAs which aren't available for a given ISA. MMX
1772 and SSE ISAs are handled separately. */
1774 #define OPTION_MASK_ISA_MMX_UNSET \
1775 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1776 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1778 #define OPTION_MASK_ISA_SSE_UNSET \
1779 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1780 #define OPTION_MASK_ISA_SSE2_UNSET \
1781 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1782 #define OPTION_MASK_ISA_SSE3_UNSET \
1783 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1784 #define OPTION_MASK_ISA_SSSE3_UNSET \
1785 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1786 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1787 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1788 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1790 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1791 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1792 #define OPTION_MASK_ISA_SSE4 \
1793 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1794 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1796 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1798 #define OPTION_MASK_ISA_SSE5_UNSET \
1799 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1801 /* Vectorization library interface and handlers. */
1802 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1803 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1805 /* Implement TARGET_HANDLE_OPTION. */
1808 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1813 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1816 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1817 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1822 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1825 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1826 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1834 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1837 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1838 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1843 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1846 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1847 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1852 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1855 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1856 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1861 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1864 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1865 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1870 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1873 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1874 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1879 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1882 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1883 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1888 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1889 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1893 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1894 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1898 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1901 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1902 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1907 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
1910 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1911 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1920 /* Sometimes certain combinations of command options do not make
1921 sense on a particular target machine. You can define a macro
1922 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1923 defined, is executed once just after all the command options have
1926 Don't use this macro to turn on various extra optimizations for
1927 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1930 override_options (void)
1933 int ix86_tune_defaulted = 0;
1934 int ix86_arch_specified = 0;
1935 unsigned int ix86_arch_mask, ix86_tune_mask;
1937 /* Comes from final.c -- no real reason to change it. */
1938 #define MAX_CODE_ALIGN 16
1942 const struct processor_costs *cost; /* Processor costs */
1943 const int align_loop; /* Default alignments. */
1944 const int align_loop_max_skip;
1945 const int align_jump;
1946 const int align_jump_max_skip;
1947 const int align_func;
1949 const processor_target_table[PROCESSOR_max] =
1951 {&i386_cost, 4, 3, 4, 3, 4},
1952 {&i486_cost, 16, 15, 16, 15, 16},
1953 {&pentium_cost, 16, 7, 16, 7, 16},
1954 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1955 {&geode_cost, 0, 0, 0, 0, 0},
1956 {&k6_cost, 32, 7, 32, 7, 32},
1957 {&athlon_cost, 16, 7, 16, 7, 16},
1958 {&pentium4_cost, 0, 0, 0, 0, 0},
1959 {&k8_cost, 16, 7, 16, 7, 16},
1960 {&nocona_cost, 0, 0, 0, 0, 0},
1961 {&core2_cost, 16, 10, 16, 10, 16},
1962 {&generic32_cost, 16, 7, 16, 7, 16},
1963 {&generic64_cost, 16, 10, 16, 10, 16},
1964 {&amdfam10_cost, 32, 24, 32, 7, 32}
1967 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1998 PTA_PREFETCH_SSE = 1 << 4,
2000 PTA_3DNOW_A = 1 << 6,
2004 PTA_POPCNT = 1 << 10,
2006 PTA_SSE4A = 1 << 12,
2007 PTA_NO_SAHF = 1 << 13,
2008 PTA_SSE4_1 = 1 << 14,
2009 PTA_SSE4_2 = 1 << 15,
2015 const char *const name; /* processor name or nickname. */
2016 const enum processor_type processor;
2017 const unsigned /*enum pta_flags*/ flags;
2019 const processor_alias_table[] =
2021 {"i386", PROCESSOR_I386, 0},
2022 {"i486", PROCESSOR_I486, 0},
2023 {"i586", PROCESSOR_PENTIUM, 0},
2024 {"pentium", PROCESSOR_PENTIUM, 0},
2025 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2026 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2027 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2028 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2029 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2030 {"i686", PROCESSOR_PENTIUMPRO, 0},
2031 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2032 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2033 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2034 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2035 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2036 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2037 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2038 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2039 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2040 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2041 | PTA_CX16 | PTA_NO_SAHF)},
2042 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2043 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2046 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2047 |PTA_PREFETCH_SSE)},
2048 {"k6", PROCESSOR_K6, PTA_MMX},
2049 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2050 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2051 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2052 | PTA_PREFETCH_SSE)},
2053 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2054 | PTA_PREFETCH_SSE)},
2055 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2057 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2059 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2061 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2062 | PTA_MMX | PTA_SSE | PTA_SSE2
2064 {"k8", PROCESSOR_K8, (PTA_64BIT
2065 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2066 | PTA_SSE | PTA_SSE2
2068 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2069 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2070 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2072 {"opteron", PROCESSOR_K8, (PTA_64BIT
2073 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2074 | PTA_SSE | PTA_SSE2
2076 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2077 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2078 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2080 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2081 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2082 | PTA_SSE | PTA_SSE2
2084 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2085 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2086 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2088 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2089 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2090 | PTA_SSE | PTA_SSE2
2092 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2093 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2094 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2096 | PTA_CX16 | PTA_ABM)},
2097 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2098 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2099 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2101 | PTA_CX16 | PTA_ABM)},
2102 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2103 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2106 int const pta_size = ARRAY_SIZE (processor_alias_table);
2108 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2109 SUBTARGET_OVERRIDE_OPTIONS;
2112 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2113 SUBSUBTARGET_OVERRIDE_OPTIONS;
2116 /* -fPIC is the default for x86_64. */
2117 if (TARGET_MACHO && TARGET_64BIT)
2120 /* Set the default values for switches whose default depends on TARGET_64BIT
2121 in case they weren't overwritten by command line options. */
2124 /* Mach-O doesn't support omitting the frame pointer for now. */
2125 if (flag_omit_frame_pointer == 2)
2126 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2127 if (flag_asynchronous_unwind_tables == 2)
2128 flag_asynchronous_unwind_tables = 1;
2129 if (flag_pcc_struct_return == 2)
2130 flag_pcc_struct_return = 0;
2134 if (flag_omit_frame_pointer == 2)
2135 flag_omit_frame_pointer = 0;
2136 if (flag_asynchronous_unwind_tables == 2)
2137 flag_asynchronous_unwind_tables = 0;
2138 if (flag_pcc_struct_return == 2)
2139 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2142 /* Need to check -mtune=generic first. */
2143 if (ix86_tune_string)
2145 if (!strcmp (ix86_tune_string, "generic")
2146 || !strcmp (ix86_tune_string, "i686")
2147 /* As special support for cross compilers we read -mtune=native
2148 as -mtune=generic. With native compilers we won't see the
2149 -mtune=native, as it was changed by the driver. */
2150 || !strcmp (ix86_tune_string, "native"))
2153 ix86_tune_string = "generic64";
2155 ix86_tune_string = "generic32";
2157 else if (!strncmp (ix86_tune_string, "generic", 7))
2158 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2162 if (ix86_arch_string)
2163 ix86_tune_string = ix86_arch_string;
2164 if (!ix86_tune_string)
2166 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2167 ix86_tune_defaulted = 1;
2170 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2171 need to use a sensible tune option. */
2172 if (!strcmp (ix86_tune_string, "generic")
2173 || !strcmp (ix86_tune_string, "x86-64")
2174 || !strcmp (ix86_tune_string, "i686"))
2177 ix86_tune_string = "generic64";
2179 ix86_tune_string = "generic32";
2182 if (ix86_stringop_string)
2184 if (!strcmp (ix86_stringop_string, "rep_byte"))
2185 stringop_alg = rep_prefix_1_byte;
2186 else if (!strcmp (ix86_stringop_string, "libcall"))
2187 stringop_alg = libcall;
2188 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2189 stringop_alg = rep_prefix_4_byte;
2190 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2191 stringop_alg = rep_prefix_8_byte;
2192 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2193 stringop_alg = loop_1_byte;
2194 else if (!strcmp (ix86_stringop_string, "loop"))
2195 stringop_alg = loop;
2196 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2197 stringop_alg = unrolled_loop;
2199 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2201 if (!strcmp (ix86_tune_string, "x86-64"))
2202 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2203 "-mtune=generic instead as appropriate.");
2205 if (!ix86_arch_string)
2206 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2208 ix86_arch_specified = 1;
2210 if (!strcmp (ix86_arch_string, "generic"))
2211 error ("generic CPU can be used only for -mtune= switch");
2212 if (!strncmp (ix86_arch_string, "generic", 7))
2213 error ("bad value (%s) for -march= switch", ix86_arch_string);
2215 if (ix86_cmodel_string != 0)
2217 if (!strcmp (ix86_cmodel_string, "small"))
2218 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2219 else if (!strcmp (ix86_cmodel_string, "medium"))
2220 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2221 else if (!strcmp (ix86_cmodel_string, "large"))
2222 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2224 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2225 else if (!strcmp (ix86_cmodel_string, "32"))
2226 ix86_cmodel = CM_32;
2227 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2228 ix86_cmodel = CM_KERNEL;
2230 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2234 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2235 use of rip-relative addressing. This eliminates fixups that
2236 would otherwise be needed if this object is to be placed in a
2237 DLL, and is essentially just as efficient as direct addressing. */
2238 if (TARGET_64BIT_MS_ABI)
2239 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2240 else if (TARGET_64BIT)
2241 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2243 ix86_cmodel = CM_32;
2245 if (ix86_asm_string != 0)
2248 && !strcmp (ix86_asm_string, "intel"))
2249 ix86_asm_dialect = ASM_INTEL;
2250 else if (!strcmp (ix86_asm_string, "att"))
2251 ix86_asm_dialect = ASM_ATT;
2253 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2255 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2256 error ("code model %qs not supported in the %s bit mode",
2257 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2258 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2259 sorry ("%i-bit mode not compiled in",
2260 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2262 for (i = 0; i < pta_size; i++)
2263 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2265 ix86_arch = processor_alias_table[i].processor;
2266 /* Default cpu tuning to the architecture. */
2267 ix86_tune = ix86_arch;
2269 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2270 error ("CPU you selected does not support x86-64 "
2273 if (processor_alias_table[i].flags & PTA_MMX
2274 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2275 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2276 if (processor_alias_table[i].flags & PTA_3DNOW
2277 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2278 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2279 if (processor_alias_table[i].flags & PTA_3DNOW_A
2280 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2281 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2282 if (processor_alias_table[i].flags & PTA_SSE
2283 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2284 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2285 if (processor_alias_table[i].flags & PTA_SSE2
2286 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2287 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2288 if (processor_alias_table[i].flags & PTA_SSE3
2289 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2290 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2291 if (processor_alias_table[i].flags & PTA_SSSE3
2292 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2293 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2294 if (processor_alias_table[i].flags & PTA_SSE4_1
2295 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2296 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2297 if (processor_alias_table[i].flags & PTA_SSE4_2
2298 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2299 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2300 if (processor_alias_table[i].flags & PTA_SSE4A
2301 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2302 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2303 if (processor_alias_table[i].flags & PTA_SSE5
2304 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2305 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2307 if (processor_alias_table[i].flags & PTA_ABM)
2309 if (processor_alias_table[i].flags & PTA_CX16)
2310 x86_cmpxchg16b = true;
2311 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2313 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2314 x86_prefetch_sse = true;
2315 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2322 error ("bad value (%s) for -march= switch", ix86_arch_string);
2324 ix86_arch_mask = 1u << ix86_arch;
2325 for (i = 0; i < X86_ARCH_LAST; ++i)
2326 ix86_arch_features[i] &= ix86_arch_mask;
2328 for (i = 0; i < pta_size; i++)
2329 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2331 ix86_tune = processor_alias_table[i].processor;
2332 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2334 if (ix86_tune_defaulted)
2336 ix86_tune_string = "x86-64";
2337 for (i = 0; i < pta_size; i++)
2338 if (! strcmp (ix86_tune_string,
2339 processor_alias_table[i].name))
2341 ix86_tune = processor_alias_table[i].processor;
2344 error ("CPU you selected does not support x86-64 "
2347 /* Intel CPUs have always interpreted SSE prefetch instructions as
2348 NOPs; so, we can enable SSE prefetch instructions even when
2349 -mtune (rather than -march) points us to a processor that has them.
2350 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2351 higher processors. */
2353 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2354 x86_prefetch_sse = true;
2358 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2360 ix86_tune_mask = 1u << ix86_tune;
2361 for (i = 0; i < X86_TUNE_LAST; ++i)
2362 ix86_tune_features[i] &= ix86_tune_mask;
2365 ix86_cost = &size_cost;
2367 ix86_cost = processor_target_table[ix86_tune].cost;
2369 /* Arrange to set up i386_stack_locals for all functions. */
2370 init_machine_status = ix86_init_machine_status;
2372 /* Validate -mregparm= value. */
2373 if (ix86_regparm_string)
2376 warning (0, "-mregparm is ignored in 64-bit mode");
2377 i = atoi (ix86_regparm_string);
2378 if (i < 0 || i > REGPARM_MAX)
2379 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2384 ix86_regparm = REGPARM_MAX;
2386 /* If the user has provided any of the -malign-* options,
2387 warn and use that value only if -falign-* is not set.
2388 Remove this code in GCC 3.2 or later. */
2389 if (ix86_align_loops_string)
2391 warning (0, "-malign-loops is obsolete, use -falign-loops");
2392 if (align_loops == 0)
2394 i = atoi (ix86_align_loops_string);
2395 if (i < 0 || i > MAX_CODE_ALIGN)
2396 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2398 align_loops = 1 << i;
2402 if (ix86_align_jumps_string)
2404 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2405 if (align_jumps == 0)
2407 i = atoi (ix86_align_jumps_string);
2408 if (i < 0 || i > MAX_CODE_ALIGN)
2409 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2411 align_jumps = 1 << i;
2415 if (ix86_align_funcs_string)
2417 warning (0, "-malign-functions is obsolete, use -falign-functions");
2418 if (align_functions == 0)
2420 i = atoi (ix86_align_funcs_string);
2421 if (i < 0 || i > MAX_CODE_ALIGN)
2422 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2424 align_functions = 1 << i;
2428 /* Default align_* from the processor table. */
2429 if (align_loops == 0)
2431 align_loops = processor_target_table[ix86_tune].align_loop;
2432 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2434 if (align_jumps == 0)
2436 align_jumps = processor_target_table[ix86_tune].align_jump;
2437 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2439 if (align_functions == 0)
2441 align_functions = processor_target_table[ix86_tune].align_func;
2444 /* Validate -mbranch-cost= value, or provide default. */
2445 ix86_branch_cost = ix86_cost->branch_cost;
2446 if (ix86_branch_cost_string)
2448 i = atoi (ix86_branch_cost_string);
2450 error ("-mbranch-cost=%d is not between 0 and 5", i);
2452 ix86_branch_cost = i;
2454 if (ix86_section_threshold_string)
2456 i = atoi (ix86_section_threshold_string);
2458 error ("-mlarge-data-threshold=%d is negative", i);
2460 ix86_section_threshold = i;
2463 if (ix86_tls_dialect_string)
2465 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2466 ix86_tls_dialect = TLS_DIALECT_GNU;
2467 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2468 ix86_tls_dialect = TLS_DIALECT_GNU2;
2469 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2470 ix86_tls_dialect = TLS_DIALECT_SUN;
2472 error ("bad value (%s) for -mtls-dialect= switch",
2473 ix86_tls_dialect_string);
2476 if (ix87_precision_string)
2478 i = atoi (ix87_precision_string);
2479 if (i != 32 && i != 64 && i != 80)
2480 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2485 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2487 /* Enable by default the SSE and MMX builtins. Do allow the user to
2488 explicitly disable any of these. In particular, disabling SSE and
2489 MMX for kernel code is extremely useful. */
2490 if (!ix86_arch_specified)
2492 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2493 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2496 warning (0, "-mrtd is ignored in 64bit mode");
2500 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2502 if (!ix86_arch_specified)
2504 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2506 /* i386 ABI does not specify red zone. It still makes sense to use it
2507 when programmer takes care to stack from being destroyed. */
2508 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2509 target_flags |= MASK_NO_RED_ZONE;
2512 /* Keep nonleaf frame pointers. */
2513 if (flag_omit_frame_pointer)
2514 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2515 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2516 flag_omit_frame_pointer = 1;
2518 /* If we're doing fast math, we don't care about comparison order
2519 wrt NaNs. This lets us use a shorter comparison sequence. */
2520 if (flag_finite_math_only)
2521 target_flags &= ~MASK_IEEE_FP;
2523 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2524 since the insns won't need emulation. */
2525 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2526 target_flags &= ~MASK_NO_FANCY_MATH_387;
2528 /* Likewise, if the target doesn't have a 387, or we've specified
2529 software floating point, don't use 387 inline intrinsics. */
2531 target_flags |= MASK_NO_FANCY_MATH_387;
2533 /* Turn on SSE4A bultins for -msse5. */
2535 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2537 /* Turn on SSE4.1 builtins for -msse4.2. */
2539 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2541 /* Turn on SSSE3 builtins for -msse4.1. */
2543 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2545 /* Turn on SSE3 builtins for -mssse3. */
2547 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2549 /* Turn on SSE3 builtins for -msse4a. */
2551 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2553 /* Turn on SSE2 builtins for -msse3. */
2555 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2557 /* Turn on SSE builtins for -msse2. */
2559 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2561 /* Turn on MMX builtins for -msse. */
2564 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2565 x86_prefetch_sse = true;
2568 /* Turn on MMX builtins for 3Dnow. */
2570 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2572 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2573 if (TARGET_SSE4_2 || TARGET_ABM)
2576 /* Validate -mpreferred-stack-boundary= value, or provide default.
2577 The default of 128 bits is for Pentium III's SSE __m128. We can't
2578 change it because of optimize_size. Otherwise, we can't mix object
2579 files compiled with -Os and -On. */
2580 ix86_preferred_stack_boundary = 128;
2581 if (ix86_preferred_stack_boundary_string)
2583 i = atoi (ix86_preferred_stack_boundary_string);
2584 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2585 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2586 TARGET_64BIT ? 4 : 2);
2588 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2591 /* Accept -msseregparm only if at least SSE support is enabled. */
2592 if (TARGET_SSEREGPARM
2594 error ("-msseregparm used without SSE enabled");
2596 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2597 if (ix86_fpmath_string != 0)
2599 if (! strcmp (ix86_fpmath_string, "387"))
2600 ix86_fpmath = FPMATH_387;
2601 else if (! strcmp (ix86_fpmath_string, "sse"))
2605 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2606 ix86_fpmath = FPMATH_387;
2609 ix86_fpmath = FPMATH_SSE;
2611 else if (! strcmp (ix86_fpmath_string, "387,sse")
2612 || ! strcmp (ix86_fpmath_string, "sse,387"))
2616 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2617 ix86_fpmath = FPMATH_387;
2619 else if (!TARGET_80387)
2621 warning (0, "387 instruction set disabled, using SSE arithmetics");
2622 ix86_fpmath = FPMATH_SSE;
2625 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2628 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2631 /* If the i387 is disabled, then do not return values in it. */
2633 target_flags &= ~MASK_FLOAT_RETURNS;
2635 /* Use external vectorized library in vectorizing intrinsics. */
2636 if (ix86_veclibabi_string)
2638 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2639 ix86_veclib_handler = ix86_veclibabi_acml;
2641 error ("unknown vectorization library ABI type (%s) for "
2642 "-mveclibabi= switch", ix86_veclibabi_string);
2645 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2646 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2648 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2650 /* ??? Unwind info is not correct around the CFG unless either a frame
2651 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2652 unwind info generation to be aware of the CFG and propagating states
2654 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2655 || flag_exceptions || flag_non_call_exceptions)
2656 && flag_omit_frame_pointer
2657 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2659 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2660 warning (0, "unwind tables currently require either a frame pointer "
2661 "or -maccumulate-outgoing-args for correctness");
2662 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2665 /* For sane SSE instruction set generation we need fcomi instruction.
2666 It is safe to enable all CMOVE instructions. */
2670 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2673 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2674 p = strchr (internal_label_prefix, 'X');
2675 internal_label_prefix_len = p - internal_label_prefix;
2679 /* When scheduling description is not available, disable scheduler pass
2680 so it won't slow down the compilation and make x87 code slower. */
2681 if (!TARGET_SCHEDULE)
2682 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2684 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2685 set_param_value ("simultaneous-prefetches",
2686 ix86_cost->simultaneous_prefetches);
2687 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2688 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2689 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2690 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2691 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2692 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2694 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2695 can be optimized to ap = __builtin_next_arg (0). */
2696 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2697 targetm.expand_builtin_va_start = NULL;
2700 /* Return true if this goes in large data/bss. */
2703 ix86_in_large_data_p (tree exp)
2705 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2708 /* Functions are never large data. */
2709 if (TREE_CODE (exp) == FUNCTION_DECL)
2712 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2714 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2715 if (strcmp (section, ".ldata") == 0
2716 || strcmp (section, ".lbss") == 0)
2722 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2724 /* If this is an incomplete type with size 0, then we can't put it
2725 in data because it might be too big when completed. */
2726 if (!size || size > ix86_section_threshold)
2733 /* Switch to the appropriate section for output of DECL.
2734 DECL is either a `VAR_DECL' node or a constant of some sort.
2735 RELOC indicates whether forming the initial value of DECL requires
2736 link-time relocations. */
2738 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2742 x86_64_elf_select_section (tree decl, int reloc,
2743 unsigned HOST_WIDE_INT align)
2745 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2746 && ix86_in_large_data_p (decl))
2748 const char *sname = NULL;
2749 unsigned int flags = SECTION_WRITE;
2750 switch (categorize_decl_for_section (decl, reloc))
2755 case SECCAT_DATA_REL:
2756 sname = ".ldata.rel";
2758 case SECCAT_DATA_REL_LOCAL:
2759 sname = ".ldata.rel.local";
2761 case SECCAT_DATA_REL_RO:
2762 sname = ".ldata.rel.ro";
2764 case SECCAT_DATA_REL_RO_LOCAL:
2765 sname = ".ldata.rel.ro.local";
2769 flags |= SECTION_BSS;
2772 case SECCAT_RODATA_MERGE_STR:
2773 case SECCAT_RODATA_MERGE_STR_INIT:
2774 case SECCAT_RODATA_MERGE_CONST:
2778 case SECCAT_SRODATA:
2785 /* We don't split these for medium model. Place them into
2786 default sections and hope for best. */
2791 /* We might get called with string constants, but get_named_section
2792 doesn't like them as they are not DECLs. Also, we need to set
2793 flags in that case. */
2795 return get_section (sname, flags, NULL);
2796 return get_named_section (decl, sname, reloc);
2799 return default_elf_select_section (decl, reloc, align);
2802 /* Build up a unique section name, expressed as a
2803 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2804 RELOC indicates whether the initial value of EXP requires
2805 link-time relocations. */
2807 static void ATTRIBUTE_UNUSED
2808 x86_64_elf_unique_section (tree decl, int reloc)
2810 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2811 && ix86_in_large_data_p (decl))
2813 const char *prefix = NULL;
2814 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2815 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2817 switch (categorize_decl_for_section (decl, reloc))
2820 case SECCAT_DATA_REL:
2821 case SECCAT_DATA_REL_LOCAL:
2822 case SECCAT_DATA_REL_RO:
2823 case SECCAT_DATA_REL_RO_LOCAL:
2824 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2827 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2830 case SECCAT_RODATA_MERGE_STR:
2831 case SECCAT_RODATA_MERGE_STR_INIT:
2832 case SECCAT_RODATA_MERGE_CONST:
2833 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2835 case SECCAT_SRODATA:
2842 /* We don't split these for medium model. Place them into
2843 default sections and hope for best. */
2851 plen = strlen (prefix);
2853 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2854 name = targetm.strip_name_encoding (name);
2855 nlen = strlen (name);
2857 string = (char *) alloca (nlen + plen + 1);
2858 memcpy (string, prefix, plen);
2859 memcpy (string + plen, name, nlen + 1);
2861 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2865 default_unique_section (decl, reloc);
2868 #ifdef COMMON_ASM_OP
2869 /* This says how to output assembler code to declare an
2870 uninitialized external linkage data object.
2872 For medium model x86-64 we need to use .largecomm opcode for
2875 x86_elf_aligned_common (FILE *file,
2876 const char *name, unsigned HOST_WIDE_INT size,
2879 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2880 && size > (unsigned int)ix86_section_threshold)
2881 fprintf (file, ".largecomm\t");
2883 fprintf (file, "%s", COMMON_ASM_OP);
2884 assemble_name (file, name);
2885 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2886 size, align / BITS_PER_UNIT);
2890 /* Utility function for targets to use in implementing
2891 ASM_OUTPUT_ALIGNED_BSS. */
2894 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2895 const char *name, unsigned HOST_WIDE_INT size,
2898 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2899 && size > (unsigned int)ix86_section_threshold)
2900 switch_to_section (get_named_section (decl, ".lbss", 0));
2902 switch_to_section (bss_section);
2903 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2904 #ifdef ASM_DECLARE_OBJECT_NAME
2905 last_assemble_variable_decl = decl;
2906 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2908 /* Standard thing is just output label for the object. */
2909 ASM_OUTPUT_LABEL (file, name);
2910 #endif /* ASM_DECLARE_OBJECT_NAME */
2911 ASM_OUTPUT_SKIP (file, size ? size : 1);
2915 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2917 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2918 make the problem with not enough registers even worse. */
2919 #ifdef INSN_SCHEDULING
2921 flag_schedule_insns = 0;
2925 /* The Darwin libraries never set errno, so we might as well
2926 avoid calling them when that's the only reason we would. */
2927 flag_errno_math = 0;
2929 /* The default values of these switches depend on the TARGET_64BIT
2930 that is not known at this moment. Mark these values with 2 and
2931 let user the to override these. In case there is no command line option
2932 specifying them, we will set the defaults in override_options. */
2934 flag_omit_frame_pointer = 2;
2935 flag_pcc_struct_return = 2;
2936 flag_asynchronous_unwind_tables = 2;
2937 flag_vect_cost_model = 1;
2938 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2939 SUBTARGET_OPTIMIZATION_OPTIONS;
2943 /* Decide whether we can make a sibling call to a function. DECL is the
2944 declaration of the function being targeted by the call and EXP is the
2945 CALL_EXPR representing the call. */
2948 ix86_function_ok_for_sibcall (tree decl, tree exp)
2953 /* If we are generating position-independent code, we cannot sibcall
2954 optimize any indirect call, or a direct call to a global function,
2955 as the PLT requires %ebx be live. */
2956 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2963 func = TREE_TYPE (CALL_EXPR_FN (exp));
2964 if (POINTER_TYPE_P (func))
2965 func = TREE_TYPE (func);
2968 /* Check that the return value locations are the same. Like
2969 if we are returning floats on the 80387 register stack, we cannot
2970 make a sibcall from a function that doesn't return a float to a
2971 function that does or, conversely, from a function that does return
2972 a float to a function that doesn't; the necessary stack adjustment
2973 would not be executed. This is also the place we notice
2974 differences in the return value ABI. Note that it is ok for one
2975 of the functions to have void return type as long as the return
2976 value of the other is passed in a register. */
2977 a = ix86_function_value (TREE_TYPE (exp), func, false);
2978 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2980 if (STACK_REG_P (a) || STACK_REG_P (b))
2982 if (!rtx_equal_p (a, b))
2985 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2987 else if (!rtx_equal_p (a, b))
2990 /* If this call is indirect, we'll need to be able to use a call-clobbered
2991 register for the address of the target function. Make sure that all
2992 such registers are not used for passing parameters. */
2993 if (!decl && !TARGET_64BIT)
2997 /* We're looking at the CALL_EXPR, we need the type of the function. */
2998 type = CALL_EXPR_FN (exp); /* pointer expression */
2999 type = TREE_TYPE (type); /* pointer type */
3000 type = TREE_TYPE (type); /* function type */
3002 if (ix86_function_regparm (type, NULL) >= 3)
3004 /* ??? Need to count the actual number of registers to be used,
3005 not the possible number of registers. Fix later. */
3010 /* Dllimport'd functions are also called indirectly. */
3011 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3012 && decl && DECL_DLLIMPORT_P (decl)
3013 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3016 /* If we forced aligned the stack, then sibcalling would unalign the
3017 stack, which may break the called function. */
3018 if (cfun->machine->force_align_arg_pointer)
3021 /* Otherwise okay. That also includes certain types of indirect calls. */
3025 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3026 calling convention attributes;
3027 arguments as in struct attribute_spec.handler. */
3030 ix86_handle_cconv_attribute (tree *node, tree name,
3032 int flags ATTRIBUTE_UNUSED,
3035 if (TREE_CODE (*node) != FUNCTION_TYPE
3036 && TREE_CODE (*node) != METHOD_TYPE
3037 && TREE_CODE (*node) != FIELD_DECL
3038 && TREE_CODE (*node) != TYPE_DECL)
3040 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3041 IDENTIFIER_POINTER (name));
3042 *no_add_attrs = true;
3046 /* Can combine regparm with all attributes but fastcall. */
3047 if (is_attribute_p ("regparm", name))
3051 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3053 error ("fastcall and regparm attributes are not compatible");
3056 cst = TREE_VALUE (args);
3057 if (TREE_CODE (cst) != INTEGER_CST)
3059 warning (OPT_Wattributes,
3060 "%qs attribute requires an integer constant argument",
3061 IDENTIFIER_POINTER (name));
3062 *no_add_attrs = true;
3064 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3066 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3067 IDENTIFIER_POINTER (name), REGPARM_MAX);
3068 *no_add_attrs = true;
3072 && lookup_attribute (ix86_force_align_arg_pointer_string,
3073 TYPE_ATTRIBUTES (*node))
3074 && compare_tree_int (cst, REGPARM_MAX-1))
3076 error ("%s functions limited to %d register parameters",
3077 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3085 /* Do not warn when emulating the MS ABI. */
3086 if (!TARGET_64BIT_MS_ABI)
3087 warning (OPT_Wattributes, "%qs attribute ignored",
3088 IDENTIFIER_POINTER (name));
3089 *no_add_attrs = true;
3093 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3094 if (is_attribute_p ("fastcall", name))
3096 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3098 error ("fastcall and cdecl attributes are not compatible");
3100 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3102 error ("fastcall and stdcall attributes are not compatible");
3104 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3106 error ("fastcall and regparm attributes are not compatible");
3110 /* Can combine stdcall with fastcall (redundant), regparm and
3112 else if (is_attribute_p ("stdcall", name))
3114 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3116 error ("stdcall and cdecl attributes are not compatible");
3118 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3120 error ("stdcall and fastcall attributes are not compatible");
3124 /* Can combine cdecl with regparm and sseregparm. */
3125 else if (is_attribute_p ("cdecl", name))
3127 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3129 error ("stdcall and cdecl attributes are not compatible");
3131 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3133 error ("fastcall and cdecl attributes are not compatible");
3137 /* Can combine sseregparm with all attributes. */
3142 /* Return 0 if the attributes for two types are incompatible, 1 if they
3143 are compatible, and 2 if they are nearly compatible (which causes a
3144 warning to be generated). */
3147 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3149 /* Check for mismatch of non-default calling convention. */
3150 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3152 if (TREE_CODE (type1) != FUNCTION_TYPE
3153 && TREE_CODE (type1) != METHOD_TYPE)
3156 /* Check for mismatched fastcall/regparm types. */
3157 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3158 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3159 || (ix86_function_regparm (type1, NULL)
3160 != ix86_function_regparm (type2, NULL)))
3163 /* Check for mismatched sseregparm types. */
3164 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3165 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3168 /* Check for mismatched return types (cdecl vs stdcall). */
3169 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3170 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3176 /* Return the regparm value for a function with the indicated TYPE and DECL.
3177 DECL may be NULL when calling function indirectly
3178 or considering a libcall. */
3181 ix86_function_regparm (const_tree type, const_tree decl)
3184 int regparm = ix86_regparm;
3189 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3191 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3193 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3196 /* Use register calling convention for local functions when possible. */
3197 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3198 && flag_unit_at_a_time && !profile_flag)
3200 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3201 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3204 int local_regparm, globals = 0, regno;
3207 /* Make sure no regparm register is taken by a
3208 fixed register variable. */
3209 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3210 if (fixed_regs[local_regparm])
3213 /* We can't use regparm(3) for nested functions as these use
3214 static chain pointer in third argument. */
3215 if (local_regparm == 3
3216 && (decl_function_context (decl)
3217 || ix86_force_align_arg_pointer)
3218 && !DECL_NO_STATIC_CHAIN (decl))
3221 /* If the function realigns its stackpointer, the prologue will
3222 clobber %ecx. If we've already generated code for the callee,
3223 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3224 scanning the attributes for the self-realigning property. */
3225 f = DECL_STRUCT_FUNCTION (decl);
3226 if (local_regparm == 3
3227 && (f ? !!f->machine->force_align_arg_pointer
3228 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3229 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3232 /* Each fixed register usage increases register pressure,
3233 so less registers should be used for argument passing.
3234 This functionality can be overriden by an explicit
3236 for (regno = 0; regno <= DI_REG; regno++)
3237 if (fixed_regs[regno])
3241 = globals < local_regparm ? local_regparm - globals : 0;
3243 if (local_regparm > regparm)
3244 regparm = local_regparm;
3251 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3252 DFmode (2) arguments in SSE registers for a function with the
3253 indicated TYPE and DECL. DECL may be NULL when calling function
3254 indirectly or considering a libcall. Otherwise return 0. */
3257 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3259 gcc_assert (!TARGET_64BIT);
3261 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3262 by the sseregparm attribute. */
3263 if (TARGET_SSEREGPARM
3264 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3271 error ("Calling %qD with attribute sseregparm without "
3272 "SSE/SSE2 enabled", decl);
3274 error ("Calling %qT with attribute sseregparm without "
3275 "SSE/SSE2 enabled", type);
3283 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3284 (and DFmode for SSE2) arguments in SSE registers. */
3285 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3287 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3288 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3290 return TARGET_SSE2 ? 2 : 1;
3296 /* Return true if EAX is live at the start of the function. Used by
3297 ix86_expand_prologue to determine if we need special help before
3298 calling allocate_stack_worker. */
3301 ix86_eax_live_at_start_p (void)
3303 /* Cheat. Don't bother working forward from ix86_function_regparm
3304 to the function type to whether an actual argument is located in
3305 eax. Instead just look at cfg info, which is still close enough
3306 to correct at this point. This gives false positives for broken
3307 functions that might use uninitialized data that happens to be
3308 allocated in eax, but who cares? */
3309 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3312 /* Value is the number of bytes of arguments automatically
3313 popped when returning from a subroutine call.
3314 FUNDECL is the declaration node of the function (as a tree),
3315 FUNTYPE is the data type of the function (as a tree),
3316 or for a library call it is an identifier node for the subroutine name.
3317 SIZE is the number of bytes of arguments passed on the stack.
3319 On the 80386, the RTD insn may be used to pop them if the number
3320 of args is fixed, but if the number is variable then the caller
3321 must pop them all. RTD can't be used for library calls now
3322 because the library is compiled with the Unix compiler.
3323 Use of RTD is a selectable option, since it is incompatible with
3324 standard Unix calling sequences. If the option is not selected,
3325 the caller must always pop the args.
3327 The attribute stdcall is equivalent to RTD on a per module basis. */
3330 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3334 /* None of the 64-bit ABIs pop arguments. */
3338 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3340 /* Cdecl functions override -mrtd, and never pop the stack. */
3341 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3343 /* Stdcall and fastcall functions will pop the stack if not
3345 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3346 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3349 if (rtd && ! stdarg_p (funtype))
3353 /* Lose any fake structure return argument if it is passed on the stack. */
3354 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3355 && !KEEP_AGGREGATE_RETURN_POINTER)
3357 int nregs = ix86_function_regparm (funtype, fundecl);
3359 return GET_MODE_SIZE (Pmode);
3365 /* Argument support functions. */
3367 /* Return true when register may be used to pass function parameters. */
3369 ix86_function_arg_regno_p (int regno)
3372 const int *parm_regs;
3377 return (regno < REGPARM_MAX
3378 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3380 return (regno < REGPARM_MAX
3381 || (TARGET_MMX && MMX_REGNO_P (regno)
3382 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3383 || (TARGET_SSE && SSE_REGNO_P (regno)
3384 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3389 if (SSE_REGNO_P (regno) && TARGET_SSE)
3394 if (TARGET_SSE && SSE_REGNO_P (regno)
3395 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3399 /* RAX is used as hidden argument to va_arg functions. */
3400 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3403 if (TARGET_64BIT_MS_ABI)
3404 parm_regs = x86_64_ms_abi_int_parameter_registers;
3406 parm_regs = x86_64_int_parameter_registers;
3407 for (i = 0; i < REGPARM_MAX; i++)
3408 if (regno == parm_regs[i])
3413 /* Return if we do not know how to pass TYPE solely in registers. */
3416 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3418 if (must_pass_in_stack_var_size_or_pad (mode, type))
3421 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3422 The layout_type routine is crafty and tries to trick us into passing
3423 currently unsupported vector types on the stack by using TImode. */
3424 return (!TARGET_64BIT && mode == TImode
3425 && type && TREE_CODE (type) != VECTOR_TYPE);
3428 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3429 for a call to a function whose data type is FNTYPE.
3430 For a library call, FNTYPE is 0. */
3433 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3434 tree fntype, /* tree ptr for function decl */
3435 rtx libname, /* SYMBOL_REF of library name or 0 */
3438 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3439 memset (cum, 0, sizeof (*cum));
3441 /* Set up the number of registers to use for passing arguments. */
3442 cum->nregs = ix86_regparm;
3444 cum->sse_nregs = SSE_REGPARM_MAX;
3446 cum->mmx_nregs = MMX_REGPARM_MAX;
3447 cum->warn_sse = true;
3448 cum->warn_mmx = true;
3450 /* Because type might mismatch in between caller and callee, we need to
3451 use actual type of function for local calls.
3452 FIXME: cgraph_analyze can be told to actually record if function uses
3453 va_start so for local functions maybe_vaarg can be made aggressive
3455 FIXME: once typesytem is fixed, we won't need this code anymore. */
3457 fntype = TREE_TYPE (fndecl);
3458 cum->maybe_vaarg = (fntype
3459 ? (!prototype_p (fntype) || stdarg_p (fntype))
3464 /* If there are variable arguments, then we won't pass anything
3465 in registers in 32-bit mode. */
3466 if (cum->maybe_vaarg)
3476 /* Use ecx and edx registers if function has fastcall attribute,
3477 else look for regparm information. */
3480 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3486 cum->nregs = ix86_function_regparm (fntype, fndecl);
3489 /* Set up the number of SSE registers used for passing SFmode
3490 and DFmode arguments. Warn for mismatching ABI. */
3491 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3495 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3496 But in the case of vector types, it is some vector mode.
3498 When we have only some of our vector isa extensions enabled, then there
3499 are some modes for which vector_mode_supported_p is false. For these
3500 modes, the generic vector support in gcc will choose some non-vector mode
3501 in order to implement the type. By computing the natural mode, we'll
3502 select the proper ABI location for the operand and not depend on whatever
3503 the middle-end decides to do with these vector types. */
3505 static enum machine_mode
3506 type_natural_mode (const_tree type)
3508 enum machine_mode mode = TYPE_MODE (type);
3510 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3512 HOST_WIDE_INT size = int_size_in_bytes (type);
3513 if ((size == 8 || size == 16)
3514 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3515 && TYPE_VECTOR_SUBPARTS (type) > 1)
3517 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3519 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3520 mode = MIN_MODE_VECTOR_FLOAT;
3522 mode = MIN_MODE_VECTOR_INT;
3524 /* Get the mode which has this inner mode and number of units. */
3525 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3526 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3527 && GET_MODE_INNER (mode) == innermode)
3537 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3538 this may not agree with the mode that the type system has chosen for the
3539 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3540 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3543 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3548 if (orig_mode != BLKmode)
3549 tmp = gen_rtx_REG (orig_mode, regno);
3552 tmp = gen_rtx_REG (mode, regno);
3553 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3554 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3560 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3561 of this code is to classify each 8bytes of incoming argument by the register
3562 class and assign registers accordingly. */
3564 /* Return the union class of CLASS1 and CLASS2.
3565 See the x86-64 PS ABI for details. */
3567 static enum x86_64_reg_class
3568 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3570 /* Rule #1: If both classes are equal, this is the resulting class. */
3571 if (class1 == class2)
3574 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3576 if (class1 == X86_64_NO_CLASS)
3578 if (class2 == X86_64_NO_CLASS)
3581 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3582 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3583 return X86_64_MEMORY_CLASS;
3585 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3586 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3587 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3588 return X86_64_INTEGERSI_CLASS;
3589 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3590 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3591 return X86_64_INTEGER_CLASS;
3593 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3595 if (class1 == X86_64_X87_CLASS
3596 || class1 == X86_64_X87UP_CLASS
3597 || class1 == X86_64_COMPLEX_X87_CLASS
3598 || class2 == X86_64_X87_CLASS
3599 || class2 == X86_64_X87UP_CLASS
3600 || class2 == X86_64_COMPLEX_X87_CLASS)
3601 return X86_64_MEMORY_CLASS;
3603 /* Rule #6: Otherwise class SSE is used. */
3604 return X86_64_SSE_CLASS;
3607 /* Classify the argument of type TYPE and mode MODE.
3608 CLASSES will be filled by the register class used to pass each word
3609 of the operand. The number of words is returned. In case the parameter
3610 should be passed in memory, 0 is returned. As a special case for zero
3611 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3613 BIT_OFFSET is used internally for handling records and specifies offset
3614 of the offset in bits modulo 256 to avoid overflow cases.
3616 See the x86-64 PS ABI for details.
3620 classify_argument (enum machine_mode mode, const_tree type,
3621 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3623 HOST_WIDE_INT bytes =
3624 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3625 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3627 /* Variable sized entities are always passed/returned in memory. */
3631 if (mode != VOIDmode
3632 && targetm.calls.must_pass_in_stack (mode, type))
3635 if (type && AGGREGATE_TYPE_P (type))
3639 enum x86_64_reg_class subclasses[MAX_CLASSES];
3641 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3645 for (i = 0; i < words; i++)
3646 classes[i] = X86_64_NO_CLASS;
3648 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3649 signalize memory class, so handle it as special case. */
3652 classes[0] = X86_64_NO_CLASS;
3656 /* Classify each field of record and merge classes. */
3657 switch (TREE_CODE (type))
3660 /* And now merge the fields of structure. */
3661 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3663 if (TREE_CODE (field) == FIELD_DECL)
3667 if (TREE_TYPE (field) == error_mark_node)
3670 /* Bitfields are always classified as integer. Handle them
3671 early, since later code would consider them to be
3672 misaligned integers. */
3673 if (DECL_BIT_FIELD (field))
3675 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3676 i < ((int_bit_position (field) + (bit_offset % 64))
3677 + tree_low_cst (DECL_SIZE (field), 0)
3680 merge_classes (X86_64_INTEGER_CLASS,
3685 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3686 TREE_TYPE (field), subclasses,
3687 (int_bit_position (field)
3688 + bit_offset) % 256);
3691 for (i = 0; i < num; i++)
3694 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3696 merge_classes (subclasses[i], classes[i + pos]);
3704 /* Arrays are handled as small records. */
3707 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3708 TREE_TYPE (type), subclasses, bit_offset);
3712 /* The partial classes are now full classes. */
3713 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3714 subclasses[0] = X86_64_SSE_CLASS;
3715 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3716 subclasses[0] = X86_64_INTEGER_CLASS;
3718 for (i = 0; i < words; i++)
3719 classes[i] = subclasses[i % num];
3724 case QUAL_UNION_TYPE:
3725 /* Unions are similar to RECORD_TYPE but offset is always 0.
3727 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3729 if (TREE_CODE (field) == FIELD_DECL)
3733 if (TREE_TYPE (field) == error_mark_node)
3736 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3737 TREE_TYPE (field), subclasses,
3741 for (i = 0; i < num; i++)
3742 classes[i] = merge_classes (subclasses[i], classes[i]);
3751 /* Final merger cleanup. */
3752 for (i = 0; i < words; i++)
3754 /* If one class is MEMORY, everything should be passed in
3756 if (classes[i] == X86_64_MEMORY_CLASS)
3759 /* The X86_64_SSEUP_CLASS should be always preceded by
3760 X86_64_SSE_CLASS. */
3761 if (classes[i] == X86_64_SSEUP_CLASS
3762 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3763 classes[i] = X86_64_SSE_CLASS;
3765 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3766 if (classes[i] == X86_64_X87UP_CLASS
3767 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3768 classes[i] = X86_64_SSE_CLASS;
3773 /* Compute alignment needed. We align all types to natural boundaries with
3774 exception of XFmode that is aligned to 64bits. */
3775 if (mode != VOIDmode && mode != BLKmode)
3777 int mode_alignment = GET_MODE_BITSIZE (mode);
3780 mode_alignment = 128;
3781 else if (mode == XCmode)
3782 mode_alignment = 256;
3783 if (COMPLEX_MODE_P (mode))
3784 mode_alignment /= 2;
3785 /* Misaligned fields are always returned in memory. */
3786 if (bit_offset % mode_alignment)
3790 /* for V1xx modes, just use the base mode */
3791 if (VECTOR_MODE_P (mode)
3792 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3793 mode = GET_MODE_INNER (mode);
3795 /* Classification of atomic types. */
3800 classes[0] = X86_64_SSE_CLASS;
3803 classes[0] = X86_64_SSE_CLASS;
3804 classes[1] = X86_64_SSEUP_CLASS;
3813 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3814 classes[0] = X86_64_INTEGERSI_CLASS;
3816 classes[0] = X86_64_INTEGER_CLASS;
3820 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3825 if (!(bit_offset % 64))
3826 classes[0] = X86_64_SSESF_CLASS;
3828 classes[0] = X86_64_SSE_CLASS;
3831 classes[0] = X86_64_SSEDF_CLASS;
3834 classes[0] = X86_64_X87_CLASS;
3835 classes[1] = X86_64_X87UP_CLASS;
3838 classes[0] = X86_64_SSE_CLASS;
3839 classes[1] = X86_64_SSEUP_CLASS;
3842 classes[0] = X86_64_SSE_CLASS;
3845 classes[0] = X86_64_SSEDF_CLASS;
3846 classes[1] = X86_64_SSEDF_CLASS;
3849 classes[0] = X86_64_COMPLEX_X87_CLASS;
3852 /* This modes is larger than 16 bytes. */
3860 classes[0] = X86_64_SSE_CLASS;
3861 classes[1] = X86_64_SSEUP_CLASS;
3867 classes[0] = X86_64_SSE_CLASS;
3873 gcc_assert (VECTOR_MODE_P (mode));
3878 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3880 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3881 classes[0] = X86_64_INTEGERSI_CLASS;
3883 classes[0] = X86_64_INTEGER_CLASS;
3884 classes[1] = X86_64_INTEGER_CLASS;
3885 return 1 + (bytes > 8);
3889 /* Examine the argument and return set number of register required in each
3890 class. Return 0 iff parameter should be passed in memory. */
3892 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3893 int *int_nregs, int *sse_nregs)
3895 enum x86_64_reg_class regclass[MAX_CLASSES];
3896 int n = classify_argument (mode, type, regclass, 0);
3902 for (n--; n >= 0; n--)
3903 switch (regclass[n])
3905 case X86_64_INTEGER_CLASS:
3906 case X86_64_INTEGERSI_CLASS:
3909 case X86_64_SSE_CLASS:
3910 case X86_64_SSESF_CLASS:
3911 case X86_64_SSEDF_CLASS:
3914 case X86_64_NO_CLASS:
3915 case X86_64_SSEUP_CLASS:
3917 case X86_64_X87_CLASS:
3918 case X86_64_X87UP_CLASS:
3922 case X86_64_COMPLEX_X87_CLASS:
3923 return in_return ? 2 : 0;
3924 case X86_64_MEMORY_CLASS:
3930 /* Construct container for the argument used by GCC interface. See
3931 FUNCTION_ARG for the detailed description. */
3934 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3935 const_tree type, int in_return, int nintregs, int nsseregs,
3936 const int *intreg, int sse_regno)
3938 /* The following variables hold the static issued_error state. */
3939 static bool issued_sse_arg_error;
3940 static bool issued_sse_ret_error;
3941 static bool issued_x87_ret_error;
3943 enum machine_mode tmpmode;
3945 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3946 enum x86_64_reg_class regclass[MAX_CLASSES];
3950 int needed_sseregs, needed_intregs;
3951 rtx exp[MAX_CLASSES];
3954 n = classify_argument (mode, type, regclass, 0);
3957 if (!examine_argument (mode, type, in_return, &needed_intregs,
3960 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3963 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3964 some less clueful developer tries to use floating-point anyway. */
3965 if (needed_sseregs && !TARGET_SSE)
3969 if (!issued_sse_ret_error)
3971 error ("SSE register return with SSE disabled");
3972 issued_sse_ret_error = true;
3975 else if (!issued_sse_arg_error)
3977 error ("SSE register argument with SSE disabled");
3978 issued_sse_arg_error = true;
3983 /* Likewise, error if the ABI requires us to return values in the
3984 x87 registers and the user specified -mno-80387. */
3985 if (!TARGET_80387 && in_return)
3986 for (i = 0; i < n; i++)
3987 if (regclass[i] == X86_64_X87_CLASS
3988 || regclass[i] == X86_64_X87UP_CLASS
3989 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3991 if (!issued_x87_ret_error)
3993 error ("x87 register return with x87 disabled");
3994 issued_x87_ret_error = true;
3999 /* First construct simple cases. Avoid SCmode, since we want to use
4000 single register to pass this type. */
4001 if (n == 1 && mode != SCmode)
4002 switch (regclass[0])
4004 case X86_64_INTEGER_CLASS:
4005 case X86_64_INTEGERSI_CLASS:
4006 return gen_rtx_REG (mode, intreg[0]);
4007 case X86_64_SSE_CLASS:
4008 case X86_64_SSESF_CLASS:
4009 case X86_64_SSEDF_CLASS:
4010 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4011 case X86_64_X87_CLASS:
4012 case X86_64_COMPLEX_X87_CLASS:
4013 return gen_rtx_REG (mode, FIRST_STACK_REG);
4014 case X86_64_NO_CLASS:
4015 /* Zero sized array, struct or class. */
4020 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4021 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4022 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4025 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4026 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4027 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4028 && regclass[1] == X86_64_INTEGER_CLASS
4029 && (mode == CDImode || mode == TImode || mode == TFmode)
4030 && intreg[0] + 1 == intreg[1])
4031 return gen_rtx_REG (mode, intreg[0]);
4033 /* Otherwise figure out the entries of the PARALLEL. */
4034 for (i = 0; i < n; i++)
4036 switch (regclass[i])
4038 case X86_64_NO_CLASS:
4040 case X86_64_INTEGER_CLASS:
4041 case X86_64_INTEGERSI_CLASS:
4042 /* Merge TImodes on aligned occasions here too. */
4043 if (i * 8 + 8 > bytes)
4044 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4045 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4049 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4050 if (tmpmode == BLKmode)
4052 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4053 gen_rtx_REG (tmpmode, *intreg),
4057 case X86_64_SSESF_CLASS:
4058 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4059 gen_rtx_REG (SFmode,
4060 SSE_REGNO (sse_regno)),
4064 case X86_64_SSEDF_CLASS:
4065 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4066 gen_rtx_REG (DFmode,
4067 SSE_REGNO (sse_regno)),
4071 case X86_64_SSE_CLASS:
4072 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4076 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4077 gen_rtx_REG (tmpmode,
4078 SSE_REGNO (sse_regno)),
4080 if (tmpmode == TImode)
4089 /* Empty aligned struct, union or class. */
4093 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4094 for (i = 0; i < nexps; i++)
4095 XVECEXP (ret, 0, i) = exp [i];
4099 /* Update the data in CUM to advance over an argument of mode MODE
4100 and data type TYPE. (TYPE is null for libcalls where that information
4101 may not be available.) */
4104 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4105 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4121 cum->words += words;
4122 cum->nregs -= words;
4123 cum->regno += words;
4125 if (cum->nregs <= 0)
4133 if (cum->float_in_sse < 2)
4136 if (cum->float_in_sse < 1)
4147 if (!type || !AGGREGATE_TYPE_P (type))
4149 cum->sse_words += words;
4150 cum->sse_nregs -= 1;
4151 cum->sse_regno += 1;
4152 if (cum->sse_nregs <= 0)
4164 if (!type || !AGGREGATE_TYPE_P (type))
4166 cum->mmx_words += words;
4167 cum->mmx_nregs -= 1;
4168 cum->mmx_regno += 1;
4169 if (cum->mmx_nregs <= 0)
4180 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4181 tree type, HOST_WIDE_INT words)
4183 int int_nregs, sse_nregs;
4185 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4186 cum->words += words;
4187 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4189 cum->nregs -= int_nregs;
4190 cum->sse_nregs -= sse_nregs;
4191 cum->regno += int_nregs;
4192 cum->sse_regno += sse_nregs;
4195 cum->words += words;
4199 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4200 HOST_WIDE_INT words)
4202 /* Otherwise, this should be passed indirect. */
4203 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4205 cum->words += words;
4214 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4215 tree type, int named ATTRIBUTE_UNUSED)
4217 HOST_WIDE_INT bytes, words;
4219 if (mode == BLKmode)
4220 bytes = int_size_in_bytes (type);
4222 bytes = GET_MODE_SIZE (mode);
4223 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4226 mode = type_natural_mode (type);
4228 if (TARGET_64BIT_MS_ABI)
4229 function_arg_advance_ms_64 (cum, bytes, words);
4230 else if (TARGET_64BIT)
4231 function_arg_advance_64 (cum, mode, type, words);
4233 function_arg_advance_32 (cum, mode, type, bytes, words);
4236 /* Define where to put the arguments to a function.
4237 Value is zero to push the argument on the stack,
4238 or a hard register in which to store the argument.
4240 MODE is the argument's machine mode.
4241 TYPE is the data type of the argument (as a tree).
4242 This is null for libcalls where that information may
4244 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4245 the preceding args and about the function being called.
4246 NAMED is nonzero if this argument is a named parameter
4247 (otherwise it is an extra parameter matching an ellipsis). */
4250 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4251 enum machine_mode orig_mode, tree type,
4252 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4254 static bool warnedsse, warnedmmx;
4256 /* Avoid the AL settings for the Unix64 ABI. */
4257 if (mode == VOIDmode)
4273 if (words <= cum->nregs)
4275 int regno = cum->regno;
4277 /* Fastcall allocates the first two DWORD (SImode) or
4278 smaller arguments to ECX and EDX if it isn't an
4284 || (type && AGGREGATE_TYPE_P (type)))
4287 /* ECX not EAX is the first allocated register. */
4288 if (regno == AX_REG)
4291 return gen_rtx_REG (mode, regno);
4296 if (cum->float_in_sse < 2)
4299 if (cum->float_in_sse < 1)
4309 if (!type || !AGGREGATE_TYPE_P (type))
4311 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4314 warning (0, "SSE vector argument without SSE enabled "
4318 return gen_reg_or_parallel (mode, orig_mode,
4319 cum->sse_regno + FIRST_SSE_REG);
4327 if (!type || !AGGREGATE_TYPE_P (type))
4329 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4332 warning (0, "MMX vector argument without MMX enabled "
4336 return gen_reg_or_parallel (mode, orig_mode,
4337 cum->mmx_regno + FIRST_MMX_REG);
4346 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4347 enum machine_mode orig_mode, tree type)
4349 /* Handle a hidden AL argument containing number of registers
4350 for varargs x86-64 functions. */
4351 if (mode == VOIDmode)
4352 return GEN_INT (cum->maybe_vaarg
4353 ? (cum->sse_nregs < 0
4358 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4360 &x86_64_int_parameter_registers [cum->regno],
4365 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4366 enum machine_mode orig_mode, int named)
4370 /* Avoid the AL settings for the Unix64 ABI. */
4371 if (mode == VOIDmode)
4374 /* If we've run out of registers, it goes on the stack. */
4375 if (cum->nregs == 0)
4378 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4380 /* Only floating point modes are passed in anything but integer regs. */
4381 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4384 regno = cum->regno + FIRST_SSE_REG;
4389 /* Unnamed floating parameters are passed in both the
4390 SSE and integer registers. */
4391 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4392 t2 = gen_rtx_REG (mode, regno);
4393 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4394 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4395 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4399 return gen_reg_or_parallel (mode, orig_mode, regno);
4403 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4404 tree type, int named)
4406 enum machine_mode mode = omode;
4407 HOST_WIDE_INT bytes, words;
4409 if (mode == BLKmode)
4410 bytes = int_size_in_bytes (type);
4412 bytes = GET_MODE_SIZE (mode);
4413 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4415 /* To simplify the code below, represent vector types with a vector mode
4416 even if MMX/SSE are not active. */
4417 if (type && TREE_CODE (type) == VECTOR_TYPE)
4418 mode = type_natural_mode (type);
4420 if (TARGET_64BIT_MS_ABI)
4421 return function_arg_ms_64 (cum, mode, omode, named);
4422 else if (TARGET_64BIT)
4423 return function_arg_64 (cum, mode, omode, type);
4425 return function_arg_32 (cum, mode, omode, type, bytes, words);
4428 /* A C expression that indicates when an argument must be passed by
4429 reference. If nonzero for an argument, a copy of that argument is
4430 made in memory and a pointer to the argument is passed instead of
4431 the argument itself. The pointer is passed in whatever way is
4432 appropriate for passing a pointer to that type. */
4435 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4436 enum machine_mode mode ATTRIBUTE_UNUSED,
4437 const_tree type, bool named ATTRIBUTE_UNUSED)
4439 if (TARGET_64BIT_MS_ABI)
4443 /* Arrays are passed by reference. */
4444 if (TREE_CODE (type) == ARRAY_TYPE)
4447 if (AGGREGATE_TYPE_P (type))
4449 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4450 are passed by reference. */
4451 int el2 = exact_log2 (int_size_in_bytes (type));
4452 return !(el2 >= 0 && el2 <= 3);
4456 /* __m128 is passed by reference. */
4457 /* ??? How to handle complex? For now treat them as structs,
4458 and pass them by reference if they're too large. */
4459 if (GET_MODE_SIZE (mode) > 8)
4462 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4468 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4469 ABI. Only called if TARGET_SSE. */
4471 contains_128bit_aligned_vector_p (tree type)
4473 enum machine_mode mode = TYPE_MODE (type);
4474 if (SSE_REG_MODE_P (mode)
4475 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4477 if (TYPE_ALIGN (type) < 128)
4480 if (AGGREGATE_TYPE_P (type))
4482 /* Walk the aggregates recursively. */
4483 switch (TREE_CODE (type))
4487 case QUAL_UNION_TYPE:
4491 /* Walk all the structure fields. */
4492 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4494 if (TREE_CODE (field) == FIELD_DECL
4495 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4502 /* Just for use if some languages passes arrays by value. */
4503 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4514 /* Gives the alignment boundary, in bits, of an argument with the
4515 specified mode and type. */
4518 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4522 align = TYPE_ALIGN (type);
4524 align = GET_MODE_ALIGNMENT (mode);
4525 if (align < PARM_BOUNDARY)
4526 align = PARM_BOUNDARY;
4529 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4530 make an exception for SSE modes since these require 128bit
4533 The handling here differs from field_alignment. ICC aligns MMX
4534 arguments to 4 byte boundaries, while structure fields are aligned
4535 to 8 byte boundaries. */
4537 align = PARM_BOUNDARY;
4540 if (!SSE_REG_MODE_P (mode))
4541 align = PARM_BOUNDARY;
4545 if (!contains_128bit_aligned_vector_p (type))
4546 align = PARM_BOUNDARY;
4554 /* Return true if N is a possible register number of function value. */
4557 ix86_function_value_regno_p (int regno)
4564 case FIRST_FLOAT_REG:
4565 if (TARGET_64BIT_MS_ABI)
4567 return TARGET_FLOAT_RETURNS_IN_80387;
4573 if (TARGET_MACHO || TARGET_64BIT)
4581 /* Define how to find the value returned by a function.
4582 VALTYPE is the data type of the value (as a tree).
4583 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4584 otherwise, FUNC is 0. */
4587 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4588 const_tree fntype, const_tree fn)
4592 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4593 we normally prevent this case when mmx is not available. However
4594 some ABIs may require the result to be returned like DImode. */
4595 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4596 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4598 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4599 we prevent this case when sse is not available. However some ABIs
4600 may require the result to be returned like integer TImode. */
4601 else if (mode == TImode
4602 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4603 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4605 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4606 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4607 regno = FIRST_FLOAT_REG;
4609 /* Most things go in %eax. */
4612 /* Override FP return register with %xmm0 for local functions when
4613 SSE math is enabled or for functions with sseregparm attribute. */
4614 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4616 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4617 if ((sse_level >= 1 && mode == SFmode)
4618 || (sse_level == 2 && mode == DFmode))
4619 regno = FIRST_SSE_REG;
4622 return gen_rtx_REG (orig_mode, regno);
4626 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4631 /* Handle libcalls, which don't provide a type node. */
4632 if (valtype == NULL)
4644 return gen_rtx_REG (mode, FIRST_SSE_REG);
4647 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4651 return gen_rtx_REG (mode, AX_REG);
4655 ret = construct_container (mode, orig_mode, valtype, 1,
4656 REGPARM_MAX, SSE_REGPARM_MAX,
4657 x86_64_int_return_registers, 0);
4659 /* For zero sized structures, construct_container returns NULL, but we
4660 need to keep rest of compiler happy by returning meaningful value. */
4662 ret = gen_rtx_REG (orig_mode, AX_REG);
4668 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4670 unsigned int regno = AX_REG;
4674 if (mode == SFmode || mode == DFmode)
4675 regno = FIRST_SSE_REG;
4676 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4677 regno = FIRST_SSE_REG;
4680 return gen_rtx_REG (orig_mode, regno);
4684 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4685 enum machine_mode orig_mode, enum machine_mode mode)
4687 const_tree fn, fntype;
4690 if (fntype_or_decl && DECL_P (fntype_or_decl))
4691 fn = fntype_or_decl;
4692 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4694 if (TARGET_64BIT_MS_ABI)
4695 return function_value_ms_64 (orig_mode, mode);
4696 else if (TARGET_64BIT)
4697 return function_value_64 (orig_mode, mode, valtype);
4699 return function_value_32 (orig_mode, mode, fntype, fn);
4703 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4704 bool outgoing ATTRIBUTE_UNUSED)
4706 enum machine_mode mode, orig_mode;
4708 orig_mode = TYPE_MODE (valtype);
4709 mode = type_natural_mode (valtype);
4710 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4714 ix86_libcall_value (enum machine_mode mode)
4716 return ix86_function_value_1 (NULL, NULL, mode, mode);
4719 /* Return true iff type is returned in memory. */
4722 return_in_memory_32 (const_tree type, enum machine_mode mode)
4726 if (mode == BLKmode)
4729 size = int_size_in_bytes (type);
4731 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4734 if (VECTOR_MODE_P (mode) || mode == TImode)
4736 /* User-created vectors small enough to fit in EAX. */
4740 /* MMX/3dNow values are returned in MM0,
4741 except when it doesn't exits. */
4743 return (TARGET_MMX ? 0 : 1);
4745 /* SSE values are returned in XMM0, except when it doesn't exist. */
4747 return (TARGET_SSE ? 0 : 1);
4762 return_in_memory_64 (const_tree type, enum machine_mode mode)
4764 int needed_intregs, needed_sseregs;
4765 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4769 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4771 HOST_WIDE_INT size = int_size_in_bytes (type);
4773 /* __m128 and friends are returned in xmm0. */
4774 if (!COMPLEX_MODE_P (mode) && size == 16 && VECTOR_MODE_P (mode))
4777 /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4778 return (size != 1 && size != 2 && size != 4 && size != 8)
4779 || COMPLEX_MODE_P (mode);
4783 ix86_return_in_memory (const_tree type)
4785 const enum machine_mode mode = type_natural_mode (type);
4787 if (TARGET_64BIT_MS_ABI)
4788 return return_in_memory_ms_64 (type, mode);
4789 else if (TARGET_64BIT)
4790 return return_in_memory_64 (type, mode);
4792 return return_in_memory_32 (type, mode);
4795 /* Return false iff TYPE is returned in memory. This version is used
4796 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4797 but differs notably in that when MMX is available, 8-byte vectors
4798 are returned in memory, rather than in MMX registers. */
4801 ix86_sol10_return_in_memory (const_tree type)
4804 enum machine_mode mode = type_natural_mode (type);
4807 return return_in_memory_64 (type, mode);
4809 if (mode == BLKmode)
4812 size = int_size_in_bytes (type);
4814 if (VECTOR_MODE_P (mode))
4816 /* Return in memory only if MMX registers *are* available. This
4817 seems backwards, but it is consistent with the existing
4824 else if (mode == TImode)
4826 else if (mode == XFmode)
4832 /* When returning SSE vector types, we have a choice of either
4833 (1) being abi incompatible with a -march switch, or
4834 (2) generating an error.
4835 Given no good solution, I think the safest thing is one warning.
4836 The user won't be able to use -Werror, but....
4838 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4839 called in response to actually generating a caller or callee that
4840 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4841 via aggregate_value_p for general type probing from tree-ssa. */
4844 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4846 static bool warnedsse, warnedmmx;
4848 if (!TARGET_64BIT && type)
4850 /* Look at the return type of the function, not the function type. */
4851 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4853 if (!TARGET_SSE && !warnedsse)
4856 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4859 warning (0, "SSE vector return without SSE enabled "
4864 if (!TARGET_MMX && !warnedmmx)
4866 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4869 warning (0, "MMX vector return without MMX enabled "
4879 /* Create the va_list data type. */
4882 ix86_build_builtin_va_list (void)
4884 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4886 /* For i386 we use plain pointer to argument area. */
4887 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4888 return build_pointer_type (char_type_node);
4890 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4891 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4893 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4894 unsigned_type_node);
4895 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4896 unsigned_type_node);
4897 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4899 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4902 va_list_gpr_counter_field = f_gpr;
4903 va_list_fpr_counter_field = f_fpr;
4905 DECL_FIELD_CONTEXT (f_gpr) = record;
4906 DECL_FIELD_CONTEXT (f_fpr) = record;
4907 DECL_FIELD_CONTEXT (f_ovf) = record;
4908 DECL_FIELD_CONTEXT (f_sav) = record;
4910 TREE_CHAIN (record) = type_decl;
4911 TYPE_NAME (record) = type_decl;
4912 TYPE_FIELDS (record) = f_gpr;
4913 TREE_CHAIN (f_gpr) = f_fpr;
4914 TREE_CHAIN (f_fpr) = f_ovf;
4915 TREE_CHAIN (f_ovf) = f_sav;
4917 layout_type (record);
4919 /* The correct type is an array type of one element. */
4920 return build_array_type (record, build_index_type (size_zero_node));
4923 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4926 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4936 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4939 /* Indicate to allocate space on the stack for varargs save area. */
4940 ix86_save_varrargs_registers = 1;
4941 /* We need 16-byte stack alignment to save SSE registers. If user
4942 asked for lower preferred_stack_boundary, lets just hope that he knows
4943 what he is doing and won't varargs SSE values.
4945 We also may end up assuming that only 64bit values are stored in SSE
4946 register let some floating point program work. */
4947 if (ix86_preferred_stack_boundary >= 128)
4948 cfun->stack_alignment_needed = 128;
4950 save_area = frame_pointer_rtx;
4951 set = get_varargs_alias_set ();
4953 for (i = cum->regno;
4955 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4958 mem = gen_rtx_MEM (Pmode,
4959 plus_constant (save_area, i * UNITS_PER_WORD));
4960 MEM_NOTRAP_P (mem) = 1;
4961 set_mem_alias_set (mem, set);
4962 emit_move_insn (mem, gen_rtx_REG (Pmode,
4963 x86_64_int_parameter_registers[i]));
4966 if (cum->sse_nregs && cfun->va_list_fpr_size)
4968 /* Now emit code to save SSE registers. The AX parameter contains number
4969 of SSE parameter registers used to call this function. We use
4970 sse_prologue_save insn template that produces computed jump across
4971 SSE saves. We need some preparation work to get this working. */
4973 label = gen_label_rtx ();
4974 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4976 /* Compute address to jump to :
4977 label - 5*eax + nnamed_sse_arguments*5 */
4978 tmp_reg = gen_reg_rtx (Pmode);
4979 nsse_reg = gen_reg_rtx (Pmode);
4980 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
4981 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4982 gen_rtx_MULT (Pmode, nsse_reg,
4987 gen_rtx_CONST (DImode,
4988 gen_rtx_PLUS (DImode,
4990 GEN_INT (cum->sse_regno * 4))));
4992 emit_move_insn (nsse_reg, label_ref);
4993 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4995 /* Compute address of memory block we save into. We always use pointer
4996 pointing 127 bytes after first byte to store - this is needed to keep
4997 instruction size limited by 4 bytes. */
4998 tmp_reg = gen_reg_rtx (Pmode);
4999 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5000 plus_constant (save_area,
5001 8 * REGPARM_MAX + 127)));
5002 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5003 MEM_NOTRAP_P (mem) = 1;
5004 set_mem_alias_set (mem, set);
5005 set_mem_align (mem, BITS_PER_WORD);
5007 /* And finally do the dirty job! */
5008 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5009 GEN_INT (cum->sse_regno), label));
5014 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5016 alias_set_type set = get_varargs_alias_set ();
5019 for (i = cum->regno; i < REGPARM_MAX; i++)
5023 mem = gen_rtx_MEM (Pmode,
5024 plus_constant (virtual_incoming_args_rtx,
5025 i * UNITS_PER_WORD));
5026 MEM_NOTRAP_P (mem) = 1;
5027 set_mem_alias_set (mem, set);
5029 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5030 emit_move_insn (mem, reg);
5035 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5036 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5039 CUMULATIVE_ARGS next_cum;
5042 /* This argument doesn't appear to be used anymore. Which is good,
5043 because the old code here didn't suppress rtl generation. */
5044 gcc_assert (!no_rtl);
5049 fntype = TREE_TYPE (current_function_decl);
5051 /* For varargs, we do not want to skip the dummy va_dcl argument.
5052 For stdargs, we do want to skip the last named argument. */
5054 if (stdarg_p (fntype))
5055 function_arg_advance (&next_cum, mode, type, 1);
5057 if (TARGET_64BIT_MS_ABI)
5058 setup_incoming_varargs_ms_64 (&next_cum);
5060 setup_incoming_varargs_64 (&next_cum);
5063 /* Implement va_start. */
5066 ix86_va_start (tree valist, rtx nextarg)
5068 HOST_WIDE_INT words, n_gpr, n_fpr;
5069 tree f_gpr, f_fpr, f_ovf, f_sav;
5070 tree gpr, fpr, ovf, sav, t;
5073 /* Only 64bit target needs something special. */
5074 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5076 std_expand_builtin_va_start (valist, nextarg);
5080 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5081 f_fpr = TREE_CHAIN (f_gpr);
5082 f_ovf = TREE_CHAIN (f_fpr);
5083 f_sav = TREE_CHAIN (f_ovf);
5085 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5086 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5087 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5088 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5089 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5091 /* Count number of gp and fp argument registers used. */
5092 words = current_function_args_info.words;
5093 n_gpr = current_function_args_info.regno;
5094 n_fpr = current_function_args_info.sse_regno;
5096 if (cfun->va_list_gpr_size)
5098 type = TREE_TYPE (gpr);
5099 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5100 build_int_cst (type, n_gpr * 8));
5101 TREE_SIDE_EFFECTS (t) = 1;
5102 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5105 if (cfun->va_list_fpr_size)
5107 type = TREE_TYPE (fpr);
5108 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5109 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5110 TREE_SIDE_EFFECTS (t) = 1;
5111 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5114 /* Find the overflow area. */
5115 type = TREE_TYPE (ovf);
5116 t = make_tree (type, virtual_incoming_args_rtx);
5118 t = build2 (POINTER_PLUS_EXPR, type, t,
5119 size_int (words * UNITS_PER_WORD));
5120 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5121 TREE_SIDE_EFFECTS (t) = 1;
5122 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5124 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5126 /* Find the register save area.
5127 Prologue of the function save it right above stack frame. */
5128 type = TREE_TYPE (sav);
5129 t = make_tree (type, frame_pointer_rtx);
5130 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5131 TREE_SIDE_EFFECTS (t) = 1;
5132 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5136 /* Implement va_arg. */
5139 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5141 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5142 tree f_gpr, f_fpr, f_ovf, f_sav;
5143 tree gpr, fpr, ovf, sav, t;
5145 tree lab_false, lab_over = NULL_TREE;
5150 enum machine_mode nat_mode;
5152 /* Only 64bit target needs something special. */
5153 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5154 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5156 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5157 f_fpr = TREE_CHAIN (f_gpr);
5158 f_ovf = TREE_CHAIN (f_fpr);
5159 f_sav = TREE_CHAIN (f_ovf);
5161 valist = build_va_arg_indirect_ref (valist);
5162 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5163 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5164 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5165 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5167 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5169 type = build_pointer_type (type);
5170 size = int_size_in_bytes (type);
5171 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5173 nat_mode = type_natural_mode (type);
5174 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5175 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5177 /* Pull the value out of the saved registers. */
5179 addr = create_tmp_var (ptr_type_node, "addr");
5180 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5184 int needed_intregs, needed_sseregs;
5186 tree int_addr, sse_addr;
5188 lab_false = create_artificial_label ();
5189 lab_over = create_artificial_label ();
5191 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5193 need_temp = (!REG_P (container)
5194 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5195 || TYPE_ALIGN (type) > 128));
5197 /* In case we are passing structure, verify that it is consecutive block
5198 on the register save area. If not we need to do moves. */
5199 if (!need_temp && !REG_P (container))
5201 /* Verify that all registers are strictly consecutive */
5202 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5206 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5208 rtx slot = XVECEXP (container, 0, i);
5209 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5210 || INTVAL (XEXP (slot, 1)) != i * 16)
5218 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5220 rtx slot = XVECEXP (container, 0, i);
5221 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5222 || INTVAL (XEXP (slot, 1)) != i * 8)
5234 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5235 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5236 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5237 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5240 /* First ensure that we fit completely in registers. */
5243 t = build_int_cst (TREE_TYPE (gpr),
5244 (REGPARM_MAX - needed_intregs + 1) * 8);
5245 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5246 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5247 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5248 gimplify_and_add (t, pre_p);
5252 t = build_int_cst (TREE_TYPE (fpr),
5253 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5255 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5256 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5257 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5258 gimplify_and_add (t, pre_p);
5261 /* Compute index to start of area used for integer regs. */
5264 /* int_addr = gpr + sav; */
5265 t = fold_convert (sizetype, gpr);
5266 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5267 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5268 gimplify_and_add (t, pre_p);
5272 /* sse_addr = fpr + sav; */
5273 t = fold_convert (sizetype, fpr);
5274 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5275 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5276 gimplify_and_add (t, pre_p);
5281 tree temp = create_tmp_var (type, "va_arg_tmp");
5284 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5285 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5286 gimplify_and_add (t, pre_p);
5288 for (i = 0; i < XVECLEN (container, 0); i++)
5290 rtx slot = XVECEXP (container, 0, i);
5291 rtx reg = XEXP (slot, 0);
5292 enum machine_mode mode = GET_MODE (reg);
5293 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5294 tree addr_type = build_pointer_type (piece_type);
5297 tree dest_addr, dest;
5299 if (SSE_REGNO_P (REGNO (reg)))
5301 src_addr = sse_addr;
5302 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5306 src_addr = int_addr;
5307 src_offset = REGNO (reg) * 8;
5309 src_addr = fold_convert (addr_type, src_addr);
5310 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5311 size_int (src_offset));
5312 src = build_va_arg_indirect_ref (src_addr);
5314 dest_addr = fold_convert (addr_type, addr);
5315 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5316 size_int (INTVAL (XEXP (slot, 1))));
5317 dest = build_va_arg_indirect_ref (dest_addr);
5319 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5320 gimplify_and_add (t, pre_p);
5326 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5327 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5328 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5329 gimplify_and_add (t, pre_p);
5333 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5334 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5335 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5336 gimplify_and_add (t, pre_p);
5339 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5340 gimplify_and_add (t, pre_p);
5342 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5343 append_to_statement_list (t, pre_p);
5346 /* ... otherwise out of the overflow area. */
5348 /* Care for on-stack alignment if needed. */
5349 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5350 || integer_zerop (TYPE_SIZE (type)))
5354 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5355 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5356 size_int (align - 1));
5357 t = fold_convert (sizetype, t);
5358 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5360 t = fold_convert (TREE_TYPE (ovf), t);
5362 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5364 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5365 gimplify_and_add (t2, pre_p);
5367 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5368 size_int (rsize * UNITS_PER_WORD));
5369 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5370 gimplify_and_add (t, pre_p);
5374 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5375 append_to_statement_list (t, pre_p);
5378 ptrtype = build_pointer_type (type);
5379 addr = fold_convert (ptrtype, addr);
5382 addr = build_va_arg_indirect_ref (addr);
5383 return build_va_arg_indirect_ref (addr);
5386 /* Return nonzero if OPNUM's MEM should be matched
5387 in movabs* patterns. */
5390 ix86_check_movabs (rtx insn, int opnum)
5394 set = PATTERN (insn);
5395 if (GET_CODE (set) == PARALLEL)
5396 set = XVECEXP (set, 0, 0);
5397 gcc_assert (GET_CODE (set) == SET);
5398 mem = XEXP (set, opnum);
5399 while (GET_CODE (mem) == SUBREG)
5400 mem = SUBREG_REG (mem);
5401 gcc_assert (MEM_P (mem));
5402 return (volatile_ok || !MEM_VOLATILE_P (mem));
5405 /* Initialize the table of extra 80387 mathematical constants. */
5408 init_ext_80387_constants (void)
5410 static const char * cst[5] =
5412 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5413 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5414 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5415 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5416 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5420 for (i = 0; i < 5; i++)
5422 real_from_string (&ext_80387_constants_table[i], cst[i]);
5423 /* Ensure each constant is rounded to XFmode precision. */
5424 real_convert (&ext_80387_constants_table[i],
5425 XFmode, &ext_80387_constants_table[i]);
5428 ext_80387_constants_init = 1;
5431 /* Return true if the constant is something that can be loaded with
5432 a special instruction. */
5435 standard_80387_constant_p (rtx x)
5437 enum machine_mode mode = GET_MODE (x);
5441 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5444 if (x == CONST0_RTX (mode))
5446 if (x == CONST1_RTX (mode))
5449 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5451 /* For XFmode constants, try to find a special 80387 instruction when
5452 optimizing for size or on those CPUs that benefit from them. */
5454 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5458 if (! ext_80387_constants_init)
5459 init_ext_80387_constants ();
5461 for (i = 0; i < 5; i++)
5462 if (real_identical (&r, &ext_80387_constants_table[i]))
5466 /* Load of the constant -0.0 or -1.0 will be split as
5467 fldz;fchs or fld1;fchs sequence. */
5468 if (real_isnegzero (&r))
5470 if (real_identical (&r, &dconstm1))
5476 /* Return the opcode of the special instruction to be used to load
5480 standard_80387_constant_opcode (rtx x)
5482 switch (standard_80387_constant_p (x))
5506 /* Return the CONST_DOUBLE representing the 80387 constant that is
5507 loaded by the specified special instruction. The argument IDX
5508 matches the return value from standard_80387_constant_p. */
5511 standard_80387_constant_rtx (int idx)
5515 if (! ext_80387_constants_init)
5516 init_ext_80387_constants ();
5532 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5536 /* Return 1 if mode is a valid mode for sse. */
5538 standard_sse_mode_p (enum machine_mode mode)
5555 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5558 standard_sse_constant_p (rtx x)
5560 enum machine_mode mode = GET_MODE (x);
5562 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5564 if (vector_all_ones_operand (x, mode)
5565 && standard_sse_mode_p (mode))
5566 return TARGET_SSE2 ? 2 : -1;
5571 /* Return the opcode of the special instruction to be used to load
5575 standard_sse_constant_opcode (rtx insn, rtx x)
5577 switch (standard_sse_constant_p (x))
5580 if (get_attr_mode (insn) == MODE_V4SF)
5581 return "xorps\t%0, %0";
5582 else if (get_attr_mode (insn) == MODE_V2DF)
5583 return "xorpd\t%0, %0";
5585 return "pxor\t%0, %0";
5587 return "pcmpeqd\t%0, %0";
5592 /* Returns 1 if OP contains a symbol reference */
5595 symbolic_reference_mentioned_p (rtx op)
5600 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5603 fmt = GET_RTX_FORMAT (GET_CODE (op));
5604 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5610 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5611 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5615 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5622 /* Return 1 if it is appropriate to emit `ret' instructions in the
5623 body of a function. Do this only if the epilogue is simple, needing a
5624 couple of insns. Prior to reloading, we can't tell how many registers
5625 must be saved, so return 0 then. Return 0 if there is no frame
5626 marker to de-allocate. */
5629 ix86_can_use_return_insn_p (void)
5631 struct ix86_frame frame;
5633 if (! reload_completed || frame_pointer_needed)
5636 /* Don't allow more than 32 pop, since that's all we can do
5637 with one instruction. */
5638 if (current_function_pops_args
5639 && current_function_args_size >= 32768)
5642 ix86_compute_frame_layout (&frame);
5643 return frame.to_allocate == 0 && frame.nregs == 0;
5646 /* Value should be nonzero if functions must have frame pointers.
5647 Zero means the frame pointer need not be set up (and parms may
5648 be accessed via the stack pointer) in functions that seem suitable. */
5651 ix86_frame_pointer_required (void)
5653 /* If we accessed previous frames, then the generated code expects
5654 to be able to access the saved ebp value in our frame. */
5655 if (cfun->machine->accesses_prev_frame)
5658 /* Several x86 os'es need a frame pointer for other reasons,
5659 usually pertaining to setjmp. */
5660 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5663 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5664 the frame pointer by default. Turn it back on now if we've not
5665 got a leaf function. */
5666 if (TARGET_OMIT_LEAF_FRAME_POINTER
5667 && (!current_function_is_leaf
5668 || ix86_current_function_calls_tls_descriptor))
5671 if (current_function_profile)
5677 /* Record that the current function accesses previous call frames. */
5680 ix86_setup_frame_addresses (void)
5682 cfun->machine->accesses_prev_frame = 1;
5685 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5686 # define USE_HIDDEN_LINKONCE 1
5688 # define USE_HIDDEN_LINKONCE 0
5691 static int pic_labels_used;
5693 /* Fills in the label name that should be used for a pc thunk for
5694 the given register. */
5697 get_pc_thunk_name (char name[32], unsigned int regno)
5699 gcc_assert (!TARGET_64BIT);
5701 if (USE_HIDDEN_LINKONCE)
5702 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5704 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5708 /* This function generates code for -fpic that loads %ebx with
5709 the return address of the caller and then returns. */
5712 ix86_file_end (void)
5717 for (regno = 0; regno < 8; ++regno)
5721 if (! ((pic_labels_used >> regno) & 1))
5724 get_pc_thunk_name (name, regno);
5729 switch_to_section (darwin_sections[text_coal_section]);
5730 fputs ("\t.weak_definition\t", asm_out_file);
5731 assemble_name (asm_out_file, name);
5732 fputs ("\n\t.private_extern\t", asm_out_file);
5733 assemble_name (asm_out_file, name);
5734 fputs ("\n", asm_out_file);
5735 ASM_OUTPUT_LABEL (asm_out_file, name);
5739 if (USE_HIDDEN_LINKONCE)
5743 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5745 TREE_PUBLIC (decl) = 1;
5746 TREE_STATIC (decl) = 1;
5747 DECL_ONE_ONLY (decl) = 1;
5749 (*targetm.asm_out.unique_section) (decl, 0);
5750 switch_to_section (get_named_section (decl, NULL, 0));
5752 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5753 fputs ("\t.hidden\t", asm_out_file);
5754 assemble_name (asm_out_file, name);
5755 fputc ('\n', asm_out_file);
5756 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5760 switch_to_section (text_section);
5761 ASM_OUTPUT_LABEL (asm_out_file, name);
5764 xops[0] = gen_rtx_REG (SImode, regno);
5765 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5766 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5767 output_asm_insn ("ret", xops);
5770 if (NEED_INDICATE_EXEC_STACK)
5771 file_end_indicate_exec_stack ();
5774 /* Emit code for the SET_GOT patterns. */
5777 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5783 if (TARGET_VXWORKS_RTP && flag_pic)
5785 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5786 xops[2] = gen_rtx_MEM (Pmode,
5787 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5788 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5790 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5791 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5792 an unadorned address. */
5793 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5794 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5795 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5799 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5801 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5803 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5806 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5808 output_asm_insn ("call\t%a2", xops);
5811 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5812 is what will be referenced by the Mach-O PIC subsystem. */
5814 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5817 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5818 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5821 output_asm_insn ("pop{l}\t%0", xops);
5826 get_pc_thunk_name (name, REGNO (dest));
5827 pic_labels_used |= 1 << REGNO (dest);
5829 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5830 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5831 output_asm_insn ("call\t%X2", xops);
5832 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5833 is what will be referenced by the Mach-O PIC subsystem. */
5836 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5838 targetm.asm_out.internal_label (asm_out_file, "L",
5839 CODE_LABEL_NUMBER (label));
5846 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5847 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5849 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5854 /* Generate an "push" pattern for input ARG. */
5859 return gen_rtx_SET (VOIDmode,
5861 gen_rtx_PRE_DEC (Pmode,
5862 stack_pointer_rtx)),
5866 /* Return >= 0 if there is an unused call-clobbered register available
5867 for the entire function. */
5870 ix86_select_alt_pic_regnum (void)
5872 if (current_function_is_leaf && !current_function_profile
5873 && !ix86_current_function_calls_tls_descriptor)
5876 for (i = 2; i >= 0; --i)
5877 if (!df_regs_ever_live_p (i))
5881 return INVALID_REGNUM;
5884 /* Return 1 if we need to save REGNO. */
5886 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5888 if (pic_offset_table_rtx
5889 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5890 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5891 || current_function_profile
5892 || current_function_calls_eh_return
5893 || current_function_uses_const_pool))
5895 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5900 if (current_function_calls_eh_return && maybe_eh_return)
5905 unsigned test = EH_RETURN_DATA_REGNO (i);
5906 if (test == INVALID_REGNUM)
5913 if (cfun->machine->force_align_arg_pointer
5914 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5917 return (df_regs_ever_live_p (regno)
5918 && !call_used_regs[regno]
5919 && !fixed_regs[regno]
5920 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5923 /* Return number of registers to be saved on the stack. */
5926 ix86_nsaved_regs (void)
5931 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5932 if (ix86_save_reg (regno, true))
5937 /* Return the offset between two registers, one to be eliminated, and the other
5938 its replacement, at the start of a routine. */
5941 ix86_initial_elimination_offset (int from, int to)
5943 struct ix86_frame frame;
5944 ix86_compute_frame_layout (&frame);
5946 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5947 return frame.hard_frame_pointer_offset;
5948 else if (from == FRAME_POINTER_REGNUM
5949 && to == HARD_FRAME_POINTER_REGNUM)
5950 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5953 gcc_assert (to == STACK_POINTER_REGNUM);
5955 if (from == ARG_POINTER_REGNUM)
5956 return frame.stack_pointer_offset;
5958 gcc_assert (from == FRAME_POINTER_REGNUM);
5959 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5963 /* Fill structure ix86_frame about frame of currently computed function. */
5966 ix86_compute_frame_layout (struct ix86_frame *frame)
5968 HOST_WIDE_INT total_size;
5969 unsigned int stack_alignment_needed;
5970 HOST_WIDE_INT offset;
5971 unsigned int preferred_alignment;
5972 HOST_WIDE_INT size = get_frame_size ();
5974 frame->nregs = ix86_nsaved_regs ();
5977 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5978 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5980 /* During reload iteration the amount of registers saved can change.
5981 Recompute the value as needed. Do not recompute when amount of registers
5982 didn't change as reload does multiple calls to the function and does not
5983 expect the decision to change within single iteration. */
5985 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5987 int count = frame->nregs;
5989 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5990 /* The fast prologue uses move instead of push to save registers. This
5991 is significantly longer, but also executes faster as modern hardware
5992 can execute the moves in parallel, but can't do that for push/pop.
5994 Be careful about choosing what prologue to emit: When function takes
5995 many instructions to execute we may use slow version as well as in
5996 case function is known to be outside hot spot (this is known with
5997 feedback only). Weight the size of function by number of registers
5998 to save as it is cheap to use one or two push instructions but very
5999 slow to use many of them. */
6001 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6002 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6003 || (flag_branch_probabilities
6004 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6005 cfun->machine->use_fast_prologue_epilogue = false;
6007 cfun->machine->use_fast_prologue_epilogue
6008 = !expensive_function_p (count);
6010 if (TARGET_PROLOGUE_USING_MOVE
6011 && cfun->machine->use_fast_prologue_epilogue)
6012 frame->save_regs_using_mov = true;
6014 frame->save_regs_using_mov = false;
6017 /* Skip return address and saved base pointer. */
6018 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6020 frame->hard_frame_pointer_offset = offset;
6022 /* Do some sanity checking of stack_alignment_needed and
6023 preferred_alignment, since i386 port is the only using those features
6024 that may break easily. */
6026 gcc_assert (!size || stack_alignment_needed);
6027 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6028 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6029 gcc_assert (stack_alignment_needed
6030 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6032 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6033 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6035 /* Register save area */
6036 offset += frame->nregs * UNITS_PER_WORD;
6039 if (ix86_save_varrargs_registers)
6041 offset += X86_64_VARARGS_SIZE;
6042 frame->va_arg_size = X86_64_VARARGS_SIZE;
6045 frame->va_arg_size = 0;
6047 /* Align start of frame for local function. */
6048 frame->padding1 = ((offset + stack_alignment_needed - 1)
6049 & -stack_alignment_needed) - offset;
6051 offset += frame->padding1;
6053 /* Frame pointer points here. */
6054 frame->frame_pointer_offset = offset;
6058 /* Add outgoing arguments area. Can be skipped if we eliminated
6059 all the function calls as dead code.
6060 Skipping is however impossible when function calls alloca. Alloca
6061 expander assumes that last current_function_outgoing_args_size
6062 of stack frame are unused. */
6063 if (ACCUMULATE_OUTGOING_ARGS
6064 && (!current_function_is_leaf || current_function_calls_alloca
6065 || ix86_current_function_calls_tls_descriptor))
6067 offset += current_function_outgoing_args_size;
6068 frame->outgoing_arguments_size = current_function_outgoing_args_size;
6071 frame->outgoing_arguments_size = 0;
6073 /* Align stack boundary. Only needed if we're calling another function
6075 if (!current_function_is_leaf || current_function_calls_alloca
6076 || ix86_current_function_calls_tls_descriptor)
6077 frame->padding2 = ((offset + preferred_alignment - 1)
6078 & -preferred_alignment) - offset;
6080 frame->padding2 = 0;
6082 offset += frame->padding2;
6084 /* We've reached end of stack frame. */
6085 frame->stack_pointer_offset = offset;
6087 /* Size prologue needs to allocate. */
6088 frame->to_allocate =
6089 (size + frame->padding1 + frame->padding2
6090 + frame->outgoing_arguments_size + frame->va_arg_size);
6092 if ((!frame->to_allocate && frame->nregs <= 1)
6093 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6094 frame->save_regs_using_mov = false;
6096 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6097 && current_function_is_leaf
6098 && !ix86_current_function_calls_tls_descriptor)
6100 frame->red_zone_size = frame->to_allocate;
6101 if (frame->save_regs_using_mov)
6102 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6103 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6104 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6107 frame->red_zone_size = 0;
6108 frame->to_allocate -= frame->red_zone_size;
6109 frame->stack_pointer_offset -= frame->red_zone_size;
6111 fprintf (stderr, "\n");
6112 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6113 fprintf (stderr, "size: %ld\n", (long)size);
6114 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6115 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6116 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6117 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6118 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6119 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6120 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6121 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6122 (long)frame->hard_frame_pointer_offset);
6123 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6124 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6125 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6126 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6130 /* Emit code to save registers in the prologue. */
6133 ix86_emit_save_regs (void)
6138 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6139 if (ix86_save_reg (regno, true))
6141 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6142 RTX_FRAME_RELATED_P (insn) = 1;
6146 /* Emit code to save registers using MOV insns. First register
6147 is restored from POINTER + OFFSET. */
6149 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6154 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6155 if (ix86_save_reg (regno, true))
6157 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6159 gen_rtx_REG (Pmode, regno));
6160 RTX_FRAME_RELATED_P (insn) = 1;
6161 offset += UNITS_PER_WORD;
6165 /* Expand prologue or epilogue stack adjustment.
6166 The pattern exist to put a dependency on all ebp-based memory accesses.
6167 STYLE should be negative if instructions should be marked as frame related,
6168 zero if %r11 register is live and cannot be freely used and positive
6172 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6177 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6178 else if (x86_64_immediate_operand (offset, DImode))
6179 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6183 /* r11 is used by indirect sibcall return as well, set before the
6184 epilogue and used after the epilogue. ATM indirect sibcall
6185 shouldn't be used together with huge frame sizes in one
6186 function because of the frame_size check in sibcall.c. */
6188 r11 = gen_rtx_REG (DImode, R11_REG);
6189 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6191 RTX_FRAME_RELATED_P (insn) = 1;
6192 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6196 RTX_FRAME_RELATED_P (insn) = 1;
6199 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6202 ix86_internal_arg_pointer (void)
6204 bool has_force_align_arg_pointer =
6205 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6206 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6207 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6208 && DECL_NAME (current_function_decl)
6209 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6210 && DECL_FILE_SCOPE_P (current_function_decl))
6211 || ix86_force_align_arg_pointer
6212 || has_force_align_arg_pointer)
6214 /* Nested functions can't realign the stack due to a register
6216 if (DECL_CONTEXT (current_function_decl)
6217 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6219 if (ix86_force_align_arg_pointer)
6220 warning (0, "-mstackrealign ignored for nested functions");
6221 if (has_force_align_arg_pointer)
6222 error ("%s not supported for nested functions",
6223 ix86_force_align_arg_pointer_string);
6224 return virtual_incoming_args_rtx;
6226 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6227 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6230 return virtual_incoming_args_rtx;
6233 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6234 This is called from dwarf2out.c to emit call frame instructions
6235 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6237 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6239 rtx unspec = SET_SRC (pattern);
6240 gcc_assert (GET_CODE (unspec) == UNSPEC);
6244 case UNSPEC_REG_SAVE:
6245 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6246 SET_DEST (pattern));
6248 case UNSPEC_DEF_CFA:
6249 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6250 INTVAL (XVECEXP (unspec, 0, 0)));
6257 /* Expand the prologue into a bunch of separate insns. */
6260 ix86_expand_prologue (void)
6264 struct ix86_frame frame;
6265 HOST_WIDE_INT allocate;
6267 ix86_compute_frame_layout (&frame);
6269 if (cfun->machine->force_align_arg_pointer)
6273 /* Grab the argument pointer. */
6274 x = plus_constant (stack_pointer_rtx, 4);
6275 y = cfun->machine->force_align_arg_pointer;
6276 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6277 RTX_FRAME_RELATED_P (insn) = 1;
6279 /* The unwind info consists of two parts: install the fafp as the cfa,
6280 and record the fafp as the "save register" of the stack pointer.
6281 The later is there in order that the unwinder can see where it
6282 should restore the stack pointer across the and insn. */
6283 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6284 x = gen_rtx_SET (VOIDmode, y, x);
6285 RTX_FRAME_RELATED_P (x) = 1;
6286 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6288 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6289 RTX_FRAME_RELATED_P (y) = 1;
6290 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6291 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6292 REG_NOTES (insn) = x;
6294 /* Align the stack. */
6295 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6298 /* And here we cheat like madmen with the unwind info. We force the
6299 cfa register back to sp+4, which is exactly what it was at the
6300 start of the function. Re-pushing the return address results in
6301 the return at the same spot relative to the cfa, and thus is
6302 correct wrt the unwind info. */
6303 x = cfun->machine->force_align_arg_pointer;
6304 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6305 insn = emit_insn (gen_push (x));
6306 RTX_FRAME_RELATED_P (insn) = 1;
6309 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6310 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6311 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6312 REG_NOTES (insn) = x;
6315 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6316 slower on all targets. Also sdb doesn't like it. */
6318 if (frame_pointer_needed)
6320 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6321 RTX_FRAME_RELATED_P (insn) = 1;
6323 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6324 RTX_FRAME_RELATED_P (insn) = 1;
6327 allocate = frame.to_allocate;
6329 if (!frame.save_regs_using_mov)
6330 ix86_emit_save_regs ();
6332 allocate += frame.nregs * UNITS_PER_WORD;
6334 /* When using red zone we may start register saving before allocating
6335 the stack frame saving one cycle of the prologue. However I will
6336 avoid doing this if I am going to have to probe the stack since
6337 at least on x86_64 the stack probe can turn into a call that clobbers
6338 a red zone location */
6339 if (TARGET_RED_ZONE && frame.save_regs_using_mov
6340 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6341 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6342 : stack_pointer_rtx,
6343 -frame.nregs * UNITS_PER_WORD);
6347 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6348 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6349 GEN_INT (-allocate), -1);
6352 /* Only valid for Win32. */
6353 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6357 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6359 if (TARGET_64BIT_MS_ABI)
6362 eax_live = ix86_eax_live_at_start_p ();
6366 emit_insn (gen_push (eax));
6367 allocate -= UNITS_PER_WORD;
6370 emit_move_insn (eax, GEN_INT (allocate));
6373 insn = gen_allocate_stack_worker_64 (eax);
6375 insn = gen_allocate_stack_worker_32 (eax);
6376 insn = emit_insn (insn);
6377 RTX_FRAME_RELATED_P (insn) = 1;
6378 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6379 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6380 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6381 t, REG_NOTES (insn));
6385 if (frame_pointer_needed)
6386 t = plus_constant (hard_frame_pointer_rtx,
6389 - frame.nregs * UNITS_PER_WORD);
6391 t = plus_constant (stack_pointer_rtx, allocate);
6392 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6396 if (frame.save_regs_using_mov
6397 && !(TARGET_RED_ZONE
6398 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6400 if (!frame_pointer_needed || !frame.to_allocate)
6401 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6403 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6404 -frame.nregs * UNITS_PER_WORD);
6407 pic_reg_used = false;
6408 if (pic_offset_table_rtx
6409 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6410 || current_function_profile))
6412 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6414 if (alt_pic_reg_used != INVALID_REGNUM)
6415 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6417 pic_reg_used = true;
6424 if (ix86_cmodel == CM_LARGE_PIC)
6426 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6427 rtx label = gen_label_rtx ();
6429 LABEL_PRESERVE_P (label) = 1;
6430 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6431 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6432 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6433 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6434 pic_offset_table_rtx, tmp_reg));
6437 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6440 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6443 /* Prevent function calls from being scheduled before the call to mcount.
6444 In the pic_reg_used case, make sure that the got load isn't deleted. */
6445 if (current_function_profile)
6448 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6449 emit_insn (gen_blockage ());
6453 /* Emit code to restore saved registers using MOV insns. First register
6454 is restored from POINTER + OFFSET. */
6456 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6457 int maybe_eh_return)
6460 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6462 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6463 if (ix86_save_reg (regno, maybe_eh_return))
6465 /* Ensure that adjust_address won't be forced to produce pointer
6466 out of range allowed by x86-64 instruction set. */
6467 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6471 r11 = gen_rtx_REG (DImode, R11_REG);
6472 emit_move_insn (r11, GEN_INT (offset));
6473 emit_insn (gen_adddi3 (r11, r11, pointer));
6474 base_address = gen_rtx_MEM (Pmode, r11);
6477 emit_move_insn (gen_rtx_REG (Pmode, regno),
6478 adjust_address (base_address, Pmode, offset));
6479 offset += UNITS_PER_WORD;
6483 /* Restore function stack, frame, and registers. */
6486 ix86_expand_epilogue (int style)
6489 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6490 struct ix86_frame frame;
6491 HOST_WIDE_INT offset;
6493 ix86_compute_frame_layout (&frame);
6495 /* Calculate start of saved registers relative to ebp. Special care
6496 must be taken for the normal return case of a function using
6497 eh_return: the eax and edx registers are marked as saved, but not
6498 restored along this path. */
6499 offset = frame.nregs;
6500 if (current_function_calls_eh_return && style != 2)
6502 offset *= -UNITS_PER_WORD;
6504 /* If we're only restoring one register and sp is not valid then
6505 using a move instruction to restore the register since it's
6506 less work than reloading sp and popping the register.
6508 The default code result in stack adjustment using add/lea instruction,
6509 while this code results in LEAVE instruction (or discrete equivalent),
6510 so it is profitable in some other cases as well. Especially when there
6511 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6512 and there is exactly one register to pop. This heuristic may need some
6513 tuning in future. */
6514 if ((!sp_valid && frame.nregs <= 1)
6515 || (TARGET_EPILOGUE_USING_MOVE
6516 && cfun->machine->use_fast_prologue_epilogue
6517 && (frame.nregs > 1 || frame.to_allocate))
6518 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6519 || (frame_pointer_needed && TARGET_USE_LEAVE
6520 && cfun->machine->use_fast_prologue_epilogue
6521 && frame.nregs == 1)
6522 || current_function_calls_eh_return)
6524 /* Restore registers. We can use ebp or esp to address the memory
6525 locations. If both are available, default to ebp, since offsets
6526 are known to be small. Only exception is esp pointing directly to the
6527 end of block of saved registers, where we may simplify addressing
6530 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6531 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6532 frame.to_allocate, style == 2);
6534 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6535 offset, style == 2);
6537 /* eh_return epilogues need %ecx added to the stack pointer. */
6540 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6542 if (frame_pointer_needed)
6544 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6545 tmp = plus_constant (tmp, UNITS_PER_WORD);
6546 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6548 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6549 emit_move_insn (hard_frame_pointer_rtx, tmp);
6551 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6556 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6557 tmp = plus_constant (tmp, (frame.to_allocate
6558 + frame.nregs * UNITS_PER_WORD));
6559 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6562 else if (!frame_pointer_needed)
6563 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6564 GEN_INT (frame.to_allocate
6565 + frame.nregs * UNITS_PER_WORD),
6567 /* If not an i386, mov & pop is faster than "leave". */
6568 else if (TARGET_USE_LEAVE || optimize_size
6569 || !cfun->machine->use_fast_prologue_epilogue)
6570 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6573 pro_epilogue_adjust_stack (stack_pointer_rtx,
6574 hard_frame_pointer_rtx,
6577 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6579 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6584 /* First step is to deallocate the stack frame so that we can
6585 pop the registers. */
6588 gcc_assert (frame_pointer_needed);
6589 pro_epilogue_adjust_stack (stack_pointer_rtx,
6590 hard_frame_pointer_rtx,
6591 GEN_INT (offset), style);
6593 else if (frame.to_allocate)
6594 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6595 GEN_INT (frame.to_allocate), style);
6597 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6598 if (ix86_save_reg (regno, false))
6601 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6603 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6605 if (frame_pointer_needed)
6607 /* Leave results in shorter dependency chains on CPUs that are
6608 able to grok it fast. */
6609 if (TARGET_USE_LEAVE)
6610 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6611 else if (TARGET_64BIT)
6612 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6614 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6618 if (cfun->machine->force_align_arg_pointer)
6620 emit_insn (gen_addsi3 (stack_pointer_rtx,
6621 cfun->machine->force_align_arg_pointer,
6625 /* Sibcall epilogues don't want a return instruction. */
6629 if (current_function_pops_args && current_function_args_size)
6631 rtx popc = GEN_INT (current_function_pops_args);
6633 /* i386 can only pop 64K bytes. If asked to pop more, pop
6634 return address, do explicit add, and jump indirectly to the
6637 if (current_function_pops_args >= 65536)
6639 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6641 /* There is no "pascal" calling convention in any 64bit ABI. */
6642 gcc_assert (!TARGET_64BIT);
6644 emit_insn (gen_popsi1 (ecx));
6645 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6646 emit_jump_insn (gen_return_indirect_internal (ecx));
6649 emit_jump_insn (gen_return_pop_internal (popc));
6652 emit_jump_insn (gen_return_internal ());
6655 /* Reset from the function's potential modifications. */
6658 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6659 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6661 if (pic_offset_table_rtx)
6662 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6664 /* Mach-O doesn't support labels at the end of objects, so if
6665 it looks like we might want one, insert a NOP. */
6667 rtx insn = get_last_insn ();
6670 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6671 insn = PREV_INSN (insn);
6675 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6676 fputs ("\tnop\n", file);
6682 /* Extract the parts of an RTL expression that is a valid memory address
6683 for an instruction. Return 0 if the structure of the address is
6684 grossly off. Return -1 if the address contains ASHIFT, so it is not
6685 strictly valid, but still used for computing length of lea instruction. */
6688 ix86_decompose_address (rtx addr, struct ix86_address *out)
6690 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6691 rtx base_reg, index_reg;
6692 HOST_WIDE_INT scale = 1;
6693 rtx scale_rtx = NULL_RTX;
6695 enum ix86_address_seg seg = SEG_DEFAULT;
6697 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6699 else if (GET_CODE (addr) == PLUS)
6709 addends[n++] = XEXP (op, 1);
6712 while (GET_CODE (op) == PLUS);
6717 for (i = n; i >= 0; --i)
6720 switch (GET_CODE (op))
6725 index = XEXP (op, 0);
6726 scale_rtx = XEXP (op, 1);
6730 if (XINT (op, 1) == UNSPEC_TP
6731 && TARGET_TLS_DIRECT_SEG_REFS
6732 && seg == SEG_DEFAULT)
6733 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6762 else if (GET_CODE (addr) == MULT)
6764 index = XEXP (addr, 0); /* index*scale */
6765 scale_rtx = XEXP (addr, 1);
6767 else if (GET_CODE (addr) == ASHIFT)
6771 /* We're called for lea too, which implements ashift on occasion. */
6772 index = XEXP (addr, 0);
6773 tmp = XEXP (addr, 1);
6774 if (!CONST_INT_P (tmp))
6776 scale = INTVAL (tmp);
6777 if ((unsigned HOST_WIDE_INT) scale > 3)
6783 disp = addr; /* displacement */
6785 /* Extract the integral value of scale. */
6788 if (!CONST_INT_P (scale_rtx))
6790 scale = INTVAL (scale_rtx);
6793 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6794 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6796 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6797 if (base_reg && index_reg && scale == 1
6798 && (index_reg == arg_pointer_rtx
6799 || index_reg == frame_pointer_rtx
6800 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6803 tmp = base, base = index, index = tmp;
6804 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6807 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6808 if ((base_reg == hard_frame_pointer_rtx
6809 || base_reg == frame_pointer_rtx
6810 || base_reg == arg_pointer_rtx) && !disp)
6813 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6814 Avoid this by transforming to [%esi+0]. */
6815 if (TARGET_K6 && !optimize_size
6816 && base_reg && !index_reg && !disp
6818 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6821 /* Special case: encode reg+reg instead of reg*2. */
6822 if (!base && index && scale && scale == 2)
6823 base = index, base_reg = index_reg, scale = 1;
6825 /* Special case: scaling cannot be encoded without base or displacement. */
6826 if (!base && !disp && index && scale != 1)
6838 /* Return cost of the memory address x.
6839 For i386, it is better to use a complex address than let gcc copy
6840 the address into a reg and make a new pseudo. But not if the address
6841 requires to two regs - that would mean more pseudos with longer
6844 ix86_address_cost (rtx x)
6846 struct ix86_address parts;
6848 int ok = ix86_decompose_address (x, &parts);
6852 if (parts.base && GET_CODE (parts.base) == SUBREG)
6853 parts.base = SUBREG_REG (parts.base);
6854 if (parts.index && GET_CODE (parts.index) == SUBREG)
6855 parts.index = SUBREG_REG (parts.index);
6857 /* Attempt to minimize number of registers in the address. */
6859 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6861 && (!REG_P (parts.index)
6862 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6866 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6868 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6869 && parts.base != parts.index)
6872 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6873 since it's predecode logic can't detect the length of instructions
6874 and it degenerates to vector decoded. Increase cost of such
6875 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6876 to split such addresses or even refuse such addresses at all.
6878 Following addressing modes are affected:
6883 The first and last case may be avoidable by explicitly coding the zero in
6884 memory address, but I don't have AMD-K6 machine handy to check this
6888 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6889 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6890 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6896 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6897 this is used for to form addresses to local data when -fPIC is in
6901 darwin_local_data_pic (rtx disp)
6903 if (GET_CODE (disp) == MINUS)
6905 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6906 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6907 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6909 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6910 if (! strcmp (sym_name, "<pic base>"))
6918 /* Determine if a given RTX is a valid constant. We already know this
6919 satisfies CONSTANT_P. */
6922 legitimate_constant_p (rtx x)
6924 switch (GET_CODE (x))
6929 if (GET_CODE (x) == PLUS)
6931 if (!CONST_INT_P (XEXP (x, 1)))
6936 if (TARGET_MACHO && darwin_local_data_pic (x))
6939 /* Only some unspecs are valid as "constants". */
6940 if (GET_CODE (x) == UNSPEC)
6941 switch (XINT (x, 1))
6946 return TARGET_64BIT;
6949 x = XVECEXP (x, 0, 0);
6950 return (GET_CODE (x) == SYMBOL_REF
6951 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6953 x = XVECEXP (x, 0, 0);
6954 return (GET_CODE (x) == SYMBOL_REF
6955 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6960 /* We must have drilled down to a symbol. */
6961 if (GET_CODE (x) == LABEL_REF)
6963 if (GET_CODE (x) != SYMBOL_REF)
6968 /* TLS symbols are never valid. */
6969 if (SYMBOL_REF_TLS_MODEL (x))
6972 /* DLLIMPORT symbols are never valid. */
6973 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6974 && SYMBOL_REF_DLLIMPORT_P (x))
6979 if (GET_MODE (x) == TImode
6980 && x != CONST0_RTX (TImode)
6986 if (x == CONST0_RTX (GET_MODE (x)))
6994 /* Otherwise we handle everything else in the move patterns. */
6998 /* Determine if it's legal to put X into the constant pool. This
6999 is not possible for the address of thread-local symbols, which
7000 is checked above. */
7003 ix86_cannot_force_const_mem (rtx x)
7005 /* We can always put integral constants and vectors in memory. */
7006 switch (GET_CODE (x))
7016 return !legitimate_constant_p (x);
7019 /* Determine if a given RTX is a valid constant address. */
7022 constant_address_p (rtx x)
7024 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7027 /* Nonzero if the constant value X is a legitimate general operand
7028 when generating PIC code. It is given that flag_pic is on and
7029 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7032 legitimate_pic_operand_p (rtx x)
7036 switch (GET_CODE (x))
7039 inner = XEXP (x, 0);
7040 if (GET_CODE (inner) == PLUS
7041 && CONST_INT_P (XEXP (inner, 1)))
7042 inner = XEXP (inner, 0);
7044 /* Only some unspecs are valid as "constants". */
7045 if (GET_CODE (inner) == UNSPEC)
7046 switch (XINT (inner, 1))
7051 return TARGET_64BIT;
7053 x = XVECEXP (inner, 0, 0);
7054 return (GET_CODE (x) == SYMBOL_REF
7055 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7063 return legitimate_pic_address_disp_p (x);
7070 /* Determine if a given CONST RTX is a valid memory displacement
7074 legitimate_pic_address_disp_p (rtx disp)
7078 /* In 64bit mode we can allow direct addresses of symbols and labels
7079 when they are not dynamic symbols. */
7082 rtx op0 = disp, op1;
7084 switch (GET_CODE (disp))
7090 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7092 op0 = XEXP (XEXP (disp, 0), 0);
7093 op1 = XEXP (XEXP (disp, 0), 1);
7094 if (!CONST_INT_P (op1)
7095 || INTVAL (op1) >= 16*1024*1024
7096 || INTVAL (op1) < -16*1024*1024)
7098 if (GET_CODE (op0) == LABEL_REF)
7100 if (GET_CODE (op0) != SYMBOL_REF)
7105 /* TLS references should always be enclosed in UNSPEC. */
7106 if (SYMBOL_REF_TLS_MODEL (op0))
7108 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7109 && ix86_cmodel != CM_LARGE_PIC)
7117 if (GET_CODE (disp) != CONST)
7119 disp = XEXP (disp, 0);
7123 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7124 of GOT tables. We should not need these anyway. */
7125 if (GET_CODE (disp) != UNSPEC
7126 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7127 && XINT (disp, 1) != UNSPEC_GOTOFF
7128 && XINT (disp, 1) != UNSPEC_PLTOFF))
7131 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7132 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7138 if (GET_CODE (disp) == PLUS)
7140 if (!CONST_INT_P (XEXP (disp, 1)))
7142 disp = XEXP (disp, 0);
7146 if (TARGET_MACHO && darwin_local_data_pic (disp))
7149 if (GET_CODE (disp) != UNSPEC)
7152 switch (XINT (disp, 1))
7157 /* We need to check for both symbols and labels because VxWorks loads
7158 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7160 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7161 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7163 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7164 While ABI specify also 32bit relocation but we don't produce it in
7165 small PIC model at all. */
7166 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7167 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7169 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7171 case UNSPEC_GOTTPOFF:
7172 case UNSPEC_GOTNTPOFF:
7173 case UNSPEC_INDNTPOFF:
7176 disp = XVECEXP (disp, 0, 0);
7177 return (GET_CODE (disp) == SYMBOL_REF
7178 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7180 disp = XVECEXP (disp, 0, 0);
7181 return (GET_CODE (disp) == SYMBOL_REF
7182 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7184 disp = XVECEXP (disp, 0, 0);
7185 return (GET_CODE (disp) == SYMBOL_REF
7186 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7192 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7193 memory address for an instruction. The MODE argument is the machine mode
7194 for the MEM expression that wants to use this address.
7196 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7197 convert common non-canonical forms to canonical form so that they will
7201 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7202 rtx addr, int strict)
7204 struct ix86_address parts;
7205 rtx base, index, disp;
7206 HOST_WIDE_INT scale;
7207 const char *reason = NULL;
7208 rtx reason_rtx = NULL_RTX;
7210 if (ix86_decompose_address (addr, &parts) <= 0)
7212 reason = "decomposition failed";
7217 index = parts.index;
7219 scale = parts.scale;
7221 /* Validate base register.
7223 Don't allow SUBREG's that span more than a word here. It can lead to spill
7224 failures when the base is one word out of a two word structure, which is
7225 represented internally as a DImode int. */
7234 else if (GET_CODE (base) == SUBREG
7235 && REG_P (SUBREG_REG (base))
7236 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7238 reg = SUBREG_REG (base);
7241 reason = "base is not a register";
7245 if (GET_MODE (base) != Pmode)
7247 reason = "base is not in Pmode";
7251 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7252 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7254 reason = "base is not valid";
7259 /* Validate index register.
7261 Don't allow SUBREG's that span more than a word here -- same as above. */
7270 else if (GET_CODE (index) == SUBREG
7271 && REG_P (SUBREG_REG (index))
7272 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7274 reg = SUBREG_REG (index);
7277 reason = "index is not a register";
7281 if (GET_MODE (index) != Pmode)
7283 reason = "index is not in Pmode";
7287 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7288 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7290 reason = "index is not valid";
7295 /* Validate scale factor. */
7298 reason_rtx = GEN_INT (scale);
7301 reason = "scale without index";
7305 if (scale != 2 && scale != 4 && scale != 8)
7307 reason = "scale is not a valid multiplier";
7312 /* Validate displacement. */
7317 if (GET_CODE (disp) == CONST
7318 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7319 switch (XINT (XEXP (disp, 0), 1))
7321 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7322 used. While ABI specify also 32bit relocations, we don't produce
7323 them at all and use IP relative instead. */
7326 gcc_assert (flag_pic);
7328 goto is_legitimate_pic;
7329 reason = "64bit address unspec";
7332 case UNSPEC_GOTPCREL:
7333 gcc_assert (flag_pic);
7334 goto is_legitimate_pic;
7336 case UNSPEC_GOTTPOFF:
7337 case UNSPEC_GOTNTPOFF:
7338 case UNSPEC_INDNTPOFF:
7344 reason = "invalid address unspec";
7348 else if (SYMBOLIC_CONST (disp)
7352 && MACHOPIC_INDIRECT
7353 && !machopic_operand_p (disp)
7359 if (TARGET_64BIT && (index || base))
7361 /* foo@dtpoff(%rX) is ok. */
7362 if (GET_CODE (disp) != CONST
7363 || GET_CODE (XEXP (disp, 0)) != PLUS
7364 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7365 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7366 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7367 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7369 reason = "non-constant pic memory reference";
7373 else if (! legitimate_pic_address_disp_p (disp))
7375 reason = "displacement is an invalid pic construct";
7379 /* This code used to verify that a symbolic pic displacement
7380 includes the pic_offset_table_rtx register.
7382 While this is good idea, unfortunately these constructs may
7383 be created by "adds using lea" optimization for incorrect
7392 This code is nonsensical, but results in addressing
7393 GOT table with pic_offset_table_rtx base. We can't
7394 just refuse it easily, since it gets matched by
7395 "addsi3" pattern, that later gets split to lea in the
7396 case output register differs from input. While this
7397 can be handled by separate addsi pattern for this case
7398 that never results in lea, this seems to be easier and
7399 correct fix for crash to disable this test. */
7401 else if (GET_CODE (disp) != LABEL_REF
7402 && !CONST_INT_P (disp)
7403 && (GET_CODE (disp) != CONST
7404 || !legitimate_constant_p (disp))
7405 && (GET_CODE (disp) != SYMBOL_REF
7406 || !legitimate_constant_p (disp)))
7408 reason = "displacement is not constant";
7411 else if (TARGET_64BIT
7412 && !x86_64_immediate_operand (disp, VOIDmode))
7414 reason = "displacement is out of range";
7419 /* Everything looks valid. */
7426 /* Return a unique alias set for the GOT. */
7428 static alias_set_type
7429 ix86_GOT_alias_set (void)
7431 static alias_set_type set = -1;
7433 set = new_alias_set ();
7437 /* Return a legitimate reference for ORIG (an address) using the
7438 register REG. If REG is 0, a new pseudo is generated.
7440 There are two types of references that must be handled:
7442 1. Global data references must load the address from the GOT, via
7443 the PIC reg. An insn is emitted to do this load, and the reg is
7446 2. Static data references, constant pool addresses, and code labels
7447 compute the address as an offset from the GOT, whose base is in
7448 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7449 differentiate them from global data objects. The returned
7450 address is the PIC reg + an unspec constant.
7452 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7453 reg also appears in the address. */
7456 legitimize_pic_address (rtx orig, rtx reg)
7463 if (TARGET_MACHO && !TARGET_64BIT)
7466 reg = gen_reg_rtx (Pmode);
7467 /* Use the generic Mach-O PIC machinery. */
7468 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7472 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7474 else if (TARGET_64BIT
7475 && ix86_cmodel != CM_SMALL_PIC
7476 && gotoff_operand (addr, Pmode))
7479 /* This symbol may be referenced via a displacement from the PIC
7480 base address (@GOTOFF). */
7482 if (reload_in_progress)
7483 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7484 if (GET_CODE (addr) == CONST)
7485 addr = XEXP (addr, 0);
7486 if (GET_CODE (addr) == PLUS)
7488 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7490 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7493 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7494 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7496 tmpreg = gen_reg_rtx (Pmode);
7499 emit_move_insn (tmpreg, new_rtx);
7503 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7504 tmpreg, 1, OPTAB_DIRECT);
7507 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7509 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7511 /* This symbol may be referenced via a displacement from the PIC
7512 base address (@GOTOFF). */
7514 if (reload_in_progress)
7515 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7516 if (GET_CODE (addr) == CONST)
7517 addr = XEXP (addr, 0);
7518 if (GET_CODE (addr) == PLUS)
7520 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7522 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7525 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7526 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7527 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7531 emit_move_insn (reg, new_rtx);
7535 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7536 /* We can't use @GOTOFF for text labels on VxWorks;
7537 see gotoff_operand. */
7538 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7540 /* Given that we've already handled dllimport variables separately
7541 in legitimize_address, and all other variables should satisfy
7542 legitimate_pic_address_disp_p, we should never arrive here. */
7543 gcc_assert (!TARGET_64BIT_MS_ABI);
7545 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7547 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7548 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7549 new_rtx = gen_const_mem (Pmode, new_rtx);
7550 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7553 reg = gen_reg_rtx (Pmode);
7554 /* Use directly gen_movsi, otherwise the address is loaded
7555 into register for CSE. We don't want to CSE this addresses,
7556 instead we CSE addresses from the GOT table, so skip this. */
7557 emit_insn (gen_movsi (reg, new_rtx));
7562 /* This symbol must be referenced via a load from the
7563 Global Offset Table (@GOT). */
7565 if (reload_in_progress)
7566 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7567 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7568 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7570 new_rtx = force_reg (Pmode, new_rtx);
7571 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7572 new_rtx = gen_const_mem (Pmode, new_rtx);
7573 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7576 reg = gen_reg_rtx (Pmode);
7577 emit_move_insn (reg, new_rtx);
7583 if (CONST_INT_P (addr)
7584 && !x86_64_immediate_operand (addr, VOIDmode))
7588 emit_move_insn (reg, addr);
7592 new_rtx = force_reg (Pmode, addr);
7594 else if (GET_CODE (addr) == CONST)
7596 addr = XEXP (addr, 0);
7598 /* We must match stuff we generate before. Assume the only
7599 unspecs that can get here are ours. Not that we could do
7600 anything with them anyway.... */
7601 if (GET_CODE (addr) == UNSPEC
7602 || (GET_CODE (addr) == PLUS
7603 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7605 gcc_assert (GET_CODE (addr) == PLUS);
7607 if (GET_CODE (addr) == PLUS)
7609 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7611 /* Check first to see if this is a constant offset from a @GOTOFF
7612 symbol reference. */
7613 if (gotoff_operand (op0, Pmode)
7614 && CONST_INT_P (op1))
7618 if (reload_in_progress)
7619 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7620 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7622 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7623 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7624 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7628 emit_move_insn (reg, new_rtx);
7634 if (INTVAL (op1) < -16*1024*1024
7635 || INTVAL (op1) >= 16*1024*1024)
7637 if (!x86_64_immediate_operand (op1, Pmode))
7638 op1 = force_reg (Pmode, op1);
7639 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7645 base = legitimize_pic_address (XEXP (addr, 0), reg);
7646 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7647 base == reg ? NULL_RTX : reg);
7649 if (CONST_INT_P (new_rtx))
7650 new_rtx = plus_constant (base, INTVAL (new_rtx));
7653 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7655 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7656 new_rtx = XEXP (new_rtx, 1);
7658 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7666 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7669 get_thread_pointer (int to_reg)
7673 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7677 reg = gen_reg_rtx (Pmode);
7678 insn = gen_rtx_SET (VOIDmode, reg, tp);
7679 insn = emit_insn (insn);
7684 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7685 false if we expect this to be used for a memory address and true if
7686 we expect to load the address into a register. */
7689 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7691 rtx dest, base, off, pic, tp;
7696 case TLS_MODEL_GLOBAL_DYNAMIC:
7697 dest = gen_reg_rtx (Pmode);
7698 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7700 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7702 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7705 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7706 insns = get_insns ();
7709 CONST_OR_PURE_CALL_P (insns) = 1;
7710 emit_libcall_block (insns, dest, rax, x);
7712 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7713 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7715 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7717 if (TARGET_GNU2_TLS)
7719 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7721 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7725 case TLS_MODEL_LOCAL_DYNAMIC:
7726 base = gen_reg_rtx (Pmode);
7727 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7729 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7731 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7734 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7735 insns = get_insns ();
7738 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7739 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7740 CONST_OR_PURE_CALL_P (insns) = 1;
7741 emit_libcall_block (insns, base, rax, note);
7743 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7744 emit_insn (gen_tls_local_dynamic_base_64 (base));
7746 emit_insn (gen_tls_local_dynamic_base_32 (base));
7748 if (TARGET_GNU2_TLS)
7750 rtx x = ix86_tls_module_base ();
7752 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7753 gen_rtx_MINUS (Pmode, x, tp));
7756 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7757 off = gen_rtx_CONST (Pmode, off);
7759 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7761 if (TARGET_GNU2_TLS)
7763 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7765 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7770 case TLS_MODEL_INITIAL_EXEC:
7774 type = UNSPEC_GOTNTPOFF;
7778 if (reload_in_progress)
7779 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7780 pic = pic_offset_table_rtx;
7781 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7783 else if (!TARGET_ANY_GNU_TLS)
7785 pic = gen_reg_rtx (Pmode);
7786 emit_insn (gen_set_got (pic));
7787 type = UNSPEC_GOTTPOFF;
7792 type = UNSPEC_INDNTPOFF;
7795 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7796 off = gen_rtx_CONST (Pmode, off);
7798 off = gen_rtx_PLUS (Pmode, pic, off);
7799 off = gen_const_mem (Pmode, off);
7800 set_mem_alias_set (off, ix86_GOT_alias_set ());
7802 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7804 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7805 off = force_reg (Pmode, off);
7806 return gen_rtx_PLUS (Pmode, base, off);
7810 base = get_thread_pointer (true);
7811 dest = gen_reg_rtx (Pmode);
7812 emit_insn (gen_subsi3 (dest, base, off));
7816 case TLS_MODEL_LOCAL_EXEC:
7817 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7818 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7819 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7820 off = gen_rtx_CONST (Pmode, off);
7822 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7824 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7825 return gen_rtx_PLUS (Pmode, base, off);
7829 base = get_thread_pointer (true);
7830 dest = gen_reg_rtx (Pmode);
7831 emit_insn (gen_subsi3 (dest, base, off));
7842 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7845 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7846 htab_t dllimport_map;
7849 get_dllimport_decl (tree decl)
7851 struct tree_map *h, in;
7855 size_t namelen, prefixlen;
7861 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7863 in.hash = htab_hash_pointer (decl);
7864 in.base.from = decl;
7865 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7866 h = (struct tree_map *) *loc;
7870 *loc = h = GGC_NEW (struct tree_map);
7872 h->base.from = decl;
7873 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7874 DECL_ARTIFICIAL (to) = 1;
7875 DECL_IGNORED_P (to) = 1;
7876 DECL_EXTERNAL (to) = 1;
7877 TREE_READONLY (to) = 1;
7879 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7880 name = targetm.strip_name_encoding (name);
7881 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
7882 namelen = strlen (name);
7883 prefixlen = strlen (prefix);
7884 imp_name = (char *) alloca (namelen + prefixlen + 1);
7885 memcpy (imp_name, prefix, prefixlen);
7886 memcpy (imp_name + prefixlen, name, namelen + 1);
7888 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7889 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7890 SET_SYMBOL_REF_DECL (rtl, to);
7891 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7893 rtl = gen_const_mem (Pmode, rtl);
7894 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7896 SET_DECL_RTL (to, rtl);
7897 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
7902 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7903 true if we require the result be a register. */
7906 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7911 gcc_assert (SYMBOL_REF_DECL (symbol));
7912 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7914 x = DECL_RTL (imp_decl);
7916 x = force_reg (Pmode, x);
7920 /* Try machine-dependent ways of modifying an illegitimate address
7921 to be legitimate. If we find one, return the new, valid address.
7922 This macro is used in only one place: `memory_address' in explow.c.
7924 OLDX is the address as it was before break_out_memory_refs was called.
7925 In some cases it is useful to look at this to decide what needs to be done.
7927 MODE and WIN are passed so that this macro can use
7928 GO_IF_LEGITIMATE_ADDRESS.
7930 It is always safe for this macro to do nothing. It exists to recognize
7931 opportunities to optimize the output.
7933 For the 80386, we handle X+REG by loading X into a register R and
7934 using R+REG. R will go in a general reg and indexing will be used.
7935 However, if REG is a broken-out memory address or multiplication,
7936 nothing needs to be done because REG can certainly go in a general reg.
7938 When -fpic is used, special handling is needed for symbolic references.
7939 See comments by legitimize_pic_address in i386.c for details. */
7942 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7947 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7949 return legitimize_tls_address (x, (enum tls_model) log, false);
7950 if (GET_CODE (x) == CONST
7951 && GET_CODE (XEXP (x, 0)) == PLUS
7952 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7953 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7955 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7956 (enum tls_model) log, false);
7957 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7960 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7962 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7963 return legitimize_dllimport_symbol (x, true);
7964 if (GET_CODE (x) == CONST
7965 && GET_CODE (XEXP (x, 0)) == PLUS
7966 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7967 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7969 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7970 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7974 if (flag_pic && SYMBOLIC_CONST (x))
7975 return legitimize_pic_address (x, 0);
7977 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7978 if (GET_CODE (x) == ASHIFT
7979 && CONST_INT_P (XEXP (x, 1))
7980 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7983 log = INTVAL (XEXP (x, 1));
7984 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7985 GEN_INT (1 << log));
7988 if (GET_CODE (x) == PLUS)
7990 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7992 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7993 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7994 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7997 log = INTVAL (XEXP (XEXP (x, 0), 1));
7998 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7999 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8000 GEN_INT (1 << log));
8003 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8004 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8005 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8008 log = INTVAL (XEXP (XEXP (x, 1), 1));
8009 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8010 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8011 GEN_INT (1 << log));
8014 /* Put multiply first if it isn't already. */
8015 if (GET_CODE (XEXP (x, 1)) == MULT)
8017 rtx tmp = XEXP (x, 0);
8018 XEXP (x, 0) = XEXP (x, 1);
8023 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8024 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8025 created by virtual register instantiation, register elimination, and
8026 similar optimizations. */
8027 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8030 x = gen_rtx_PLUS (Pmode,
8031 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8032 XEXP (XEXP (x, 1), 0)),
8033 XEXP (XEXP (x, 1), 1));
8037 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8038 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8039 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8040 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8041 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8042 && CONSTANT_P (XEXP (x, 1)))
8045 rtx other = NULL_RTX;
8047 if (CONST_INT_P (XEXP (x, 1)))
8049 constant = XEXP (x, 1);
8050 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8052 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8054 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8055 other = XEXP (x, 1);
8063 x = gen_rtx_PLUS (Pmode,
8064 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8065 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8066 plus_constant (other, INTVAL (constant)));
8070 if (changed && legitimate_address_p (mode, x, FALSE))
8073 if (GET_CODE (XEXP (x, 0)) == MULT)
8076 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8079 if (GET_CODE (XEXP (x, 1)) == MULT)
8082 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8086 && REG_P (XEXP (x, 1))
8087 && REG_P (XEXP (x, 0)))
8090 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8093 x = legitimize_pic_address (x, 0);
8096 if (changed && legitimate_address_p (mode, x, FALSE))
8099 if (REG_P (XEXP (x, 0)))
8101 rtx temp = gen_reg_rtx (Pmode);
8102 rtx val = force_operand (XEXP (x, 1), temp);
8104 emit_move_insn (temp, val);
8110 else if (REG_P (XEXP (x, 1)))
8112 rtx temp = gen_reg_rtx (Pmode);
8113 rtx val = force_operand (XEXP (x, 0), temp);
8115 emit_move_insn (temp, val);
8125 /* Print an integer constant expression in assembler syntax. Addition
8126 and subtraction are the only arithmetic that may appear in these
8127 expressions. FILE is the stdio stream to write to, X is the rtx, and
8128 CODE is the operand print code from the output string. */
8131 output_pic_addr_const (FILE *file, rtx x, int code)
8135 switch (GET_CODE (x))
8138 gcc_assert (flag_pic);
8143 if (! TARGET_MACHO || TARGET_64BIT)
8144 output_addr_const (file, x);
8147 const char *name = XSTR (x, 0);
8149 /* Mark the decl as referenced so that cgraph will
8150 output the function. */
8151 if (SYMBOL_REF_DECL (x))
8152 mark_decl_referenced (SYMBOL_REF_DECL (x));
8155 if (MACHOPIC_INDIRECT
8156 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8157 name = machopic_indirection_name (x, /*stub_p=*/true);
8159 assemble_name (file, name);
8161 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8162 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8163 fputs ("@PLT", file);
8170 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8171 assemble_name (asm_out_file, buf);
8175 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8179 /* This used to output parentheses around the expression,
8180 but that does not work on the 386 (either ATT or BSD assembler). */
8181 output_pic_addr_const (file, XEXP (x, 0), code);
8185 if (GET_MODE (x) == VOIDmode)
8187 /* We can use %d if the number is <32 bits and positive. */
8188 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8189 fprintf (file, "0x%lx%08lx",
8190 (unsigned long) CONST_DOUBLE_HIGH (x),
8191 (unsigned long) CONST_DOUBLE_LOW (x));
8193 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8196 /* We can't handle floating point constants;
8197 PRINT_OPERAND must handle them. */
8198 output_operand_lossage ("floating constant misused");
8202 /* Some assemblers need integer constants to appear first. */
8203 if (CONST_INT_P (XEXP (x, 0)))
8205 output_pic_addr_const (file, XEXP (x, 0), code);
8207 output_pic_addr_const (file, XEXP (x, 1), code);
8211 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8212 output_pic_addr_const (file, XEXP (x, 1), code);
8214 output_pic_addr_const (file, XEXP (x, 0), code);
8220 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8221 output_pic_addr_const (file, XEXP (x, 0), code);
8223 output_pic_addr_const (file, XEXP (x, 1), code);
8225 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8229 gcc_assert (XVECLEN (x, 0) == 1);
8230 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8231 switch (XINT (x, 1))
8234 fputs ("@GOT", file);
8237 fputs ("@GOTOFF", file);
8240 fputs ("@PLTOFF", file);
8242 case UNSPEC_GOTPCREL:
8243 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8244 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8246 case UNSPEC_GOTTPOFF:
8247 /* FIXME: This might be @TPOFF in Sun ld too. */
8248 fputs ("@GOTTPOFF", file);
8251 fputs ("@TPOFF", file);
8255 fputs ("@TPOFF", file);
8257 fputs ("@NTPOFF", file);
8260 fputs ("@DTPOFF", file);
8262 case UNSPEC_GOTNTPOFF:
8264 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8265 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8267 fputs ("@GOTNTPOFF", file);
8269 case UNSPEC_INDNTPOFF:
8270 fputs ("@INDNTPOFF", file);
8273 output_operand_lossage ("invalid UNSPEC as operand");
8279 output_operand_lossage ("invalid expression as operand");
8283 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8284 We need to emit DTP-relative relocations. */
8286 static void ATTRIBUTE_UNUSED
8287 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8289 fputs (ASM_LONG, file);
8290 output_addr_const (file, x);
8291 fputs ("@DTPOFF", file);
8297 fputs (", 0", file);
8304 /* In the name of slightly smaller debug output, and to cater to
8305 general assembler lossage, recognize PIC+GOTOFF and turn it back
8306 into a direct symbol reference.
8308 On Darwin, this is necessary to avoid a crash, because Darwin
8309 has a different PIC label for each routine but the DWARF debugging
8310 information is not associated with any particular routine, so it's
8311 necessary to remove references to the PIC label from RTL stored by
8312 the DWARF output code. */
8315 ix86_delegitimize_address (rtx orig_x)
8318 /* reg_addend is NULL or a multiple of some register. */
8319 rtx reg_addend = NULL_RTX;
8320 /* const_addend is NULL or a const_int. */
8321 rtx const_addend = NULL_RTX;
8322 /* This is the result, or NULL. */
8323 rtx result = NULL_RTX;
8330 if (GET_CODE (x) != CONST
8331 || GET_CODE (XEXP (x, 0)) != UNSPEC
8332 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8335 return XVECEXP (XEXP (x, 0), 0, 0);
8338 if (GET_CODE (x) != PLUS
8339 || GET_CODE (XEXP (x, 1)) != CONST)
8342 if (REG_P (XEXP (x, 0))
8343 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8344 /* %ebx + GOT/GOTOFF */
8346 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8348 /* %ebx + %reg * scale + GOT/GOTOFF */
8349 reg_addend = XEXP (x, 0);
8350 if (REG_P (XEXP (reg_addend, 0))
8351 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8352 reg_addend = XEXP (reg_addend, 1);
8353 else if (REG_P (XEXP (reg_addend, 1))
8354 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8355 reg_addend = XEXP (reg_addend, 0);
8358 if (!REG_P (reg_addend)
8359 && GET_CODE (reg_addend) != MULT
8360 && GET_CODE (reg_addend) != ASHIFT)
8366 x = XEXP (XEXP (x, 1), 0);
8367 if (GET_CODE (x) == PLUS
8368 && CONST_INT_P (XEXP (x, 1)))
8370 const_addend = XEXP (x, 1);
8374 if (GET_CODE (x) == UNSPEC
8375 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8376 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8377 result = XVECEXP (x, 0, 0);
8379 if (TARGET_MACHO && darwin_local_data_pic (x)
8381 result = XEXP (x, 0);
8387 result = gen_rtx_PLUS (Pmode, result, const_addend);
8389 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8393 /* If X is a machine specific address (i.e. a symbol or label being
8394 referenced as a displacement from the GOT implemented using an
8395 UNSPEC), then return the base term. Otherwise return X. */
8398 ix86_find_base_term (rtx x)
8404 if (GET_CODE (x) != CONST)
8407 if (GET_CODE (term) == PLUS
8408 && (CONST_INT_P (XEXP (term, 1))
8409 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8410 term = XEXP (term, 0);
8411 if (GET_CODE (term) != UNSPEC
8412 || XINT (term, 1) != UNSPEC_GOTPCREL)
8415 term = XVECEXP (term, 0, 0);
8417 if (GET_CODE (term) != SYMBOL_REF
8418 && GET_CODE (term) != LABEL_REF)
8424 term = ix86_delegitimize_address (x);
8426 if (GET_CODE (term) != SYMBOL_REF
8427 && GET_CODE (term) != LABEL_REF)
8434 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8439 if (mode == CCFPmode || mode == CCFPUmode)
8441 enum rtx_code second_code, bypass_code;
8442 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8443 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8444 code = ix86_fp_compare_code_to_integer (code);
8448 code = reverse_condition (code);
8499 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8503 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8504 Those same assemblers have the same but opposite lossage on cmov. */
8506 suffix = fp ? "nbe" : "a";
8507 else if (mode == CCCmode)
8530 gcc_assert (mode == CCmode || mode == CCCmode);
8552 gcc_assert (mode == CCmode || mode == CCCmode);
8553 suffix = fp ? "nb" : "ae";
8556 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8563 else if (mode == CCCmode)
8564 suffix = fp ? "nb" : "ae";
8569 suffix = fp ? "u" : "p";
8572 suffix = fp ? "nu" : "np";
8577 fputs (suffix, file);
8580 /* Print the name of register X to FILE based on its machine mode and number.
8581 If CODE is 'w', pretend the mode is HImode.
8582 If CODE is 'b', pretend the mode is QImode.
8583 If CODE is 'k', pretend the mode is SImode.
8584 If CODE is 'q', pretend the mode is DImode.
8585 If CODE is 'h', pretend the reg is the 'high' byte register.
8586 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8589 print_reg (rtx x, int code, FILE *file)
8591 gcc_assert (x == pc_rtx
8592 || (REGNO (x) != ARG_POINTER_REGNUM
8593 && REGNO (x) != FRAME_POINTER_REGNUM
8594 && REGNO (x) != FLAGS_REG
8595 && REGNO (x) != FPSR_REG
8596 && REGNO (x) != FPCR_REG));
8598 if (ASSEMBLER_DIALECT == ASM_ATT)
8603 gcc_assert (TARGET_64BIT);
8604 fputs ("rip", file);
8608 if (code == 'w' || MMX_REG_P (x))
8610 else if (code == 'b')
8612 else if (code == 'k')
8614 else if (code == 'q')
8616 else if (code == 'y')
8618 else if (code == 'h')
8621 code = GET_MODE_SIZE (GET_MODE (x));
8623 /* Irritatingly, AMD extended registers use different naming convention
8624 from the normal registers. */
8625 if (REX_INT_REG_P (x))
8627 gcc_assert (TARGET_64BIT);
8631 error ("extended registers have no high halves");
8634 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8637 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8640 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8643 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8646 error ("unsupported operand size for extended register");
8654 if (STACK_TOP_P (x))
8656 fputs ("st(0)", file);
8663 if (! ANY_FP_REG_P (x))
8664 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8669 fputs (hi_reg_name[REGNO (x)], file);
8672 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8674 fputs (qi_reg_name[REGNO (x)], file);
8677 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8679 fputs (qi_high_reg_name[REGNO (x)], file);
8686 /* Locate some local-dynamic symbol still in use by this function
8687 so that we can print its name in some tls_local_dynamic_base
8691 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8695 if (GET_CODE (x) == SYMBOL_REF
8696 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8698 cfun->machine->some_ld_name = XSTR (x, 0);
8706 get_some_local_dynamic_name (void)
8710 if (cfun->machine->some_ld_name)
8711 return cfun->machine->some_ld_name;
8713 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8715 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8716 return cfun->machine->some_ld_name;
8722 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8723 C -- print opcode suffix for set/cmov insn.
8724 c -- like C, but print reversed condition
8725 F,f -- likewise, but for floating-point.
8726 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8728 R -- print the prefix for register names.
8729 z -- print the opcode suffix for the size of the current operand.
8730 * -- print a star (in certain assembler syntax)
8731 A -- print an absolute memory reference.
8732 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8733 s -- print a shift double count, followed by the assemblers argument
8735 b -- print the QImode name of the register for the indicated operand.
8736 %b0 would print %al if operands[0] is reg 0.
8737 w -- likewise, print the HImode name of the register.
8738 k -- likewise, print the SImode name of the register.
8739 q -- likewise, print the DImode name of the register.
8740 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8741 y -- print "st(0)" instead of "st" as a register.
8742 D -- print condition for SSE cmp instruction.
8743 P -- if PIC, print an @PLT suffix.
8744 X -- don't print any sort of PIC '@' suffix for a symbol.
8745 & -- print some in-use local-dynamic symbol name.
8746 H -- print a memory address offset by 8; used for sse high-parts
8747 Y -- print condition for SSE5 com* instruction.
8748 + -- print a branch hint as 'cs' or 'ds' prefix
8749 ; -- print a semicolon (after prefixes due to bug in older gas).
8753 print_operand (FILE *file, rtx x, int code)
8760 if (ASSEMBLER_DIALECT == ASM_ATT)
8765 assemble_name (file, get_some_local_dynamic_name ());
8769 switch (ASSEMBLER_DIALECT)
8776 /* Intel syntax. For absolute addresses, registers should not
8777 be surrounded by braces. */
8781 PRINT_OPERAND (file, x, 0);
8791 PRINT_OPERAND (file, x, 0);
8796 if (ASSEMBLER_DIALECT == ASM_ATT)
8801 if (ASSEMBLER_DIALECT == ASM_ATT)
8806 if (ASSEMBLER_DIALECT == ASM_ATT)
8811 if (ASSEMBLER_DIALECT == ASM_ATT)
8816 if (ASSEMBLER_DIALECT == ASM_ATT)
8821 if (ASSEMBLER_DIALECT == ASM_ATT)
8826 /* 387 opcodes don't get size suffixes if the operands are
8828 if (STACK_REG_P (x))
8831 /* Likewise if using Intel opcodes. */
8832 if (ASSEMBLER_DIALECT == ASM_INTEL)
8835 /* This is the size of op from size of operand. */
8836 switch (GET_MODE_SIZE (GET_MODE (x)))
8845 #ifdef HAVE_GAS_FILDS_FISTS
8855 if (GET_MODE (x) == SFmode)
8870 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8872 #ifdef GAS_MNEMONICS
8898 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8900 PRINT_OPERAND (file, x, 0);
8906 /* Little bit of braindamage here. The SSE compare instructions
8907 does use completely different names for the comparisons that the
8908 fp conditional moves. */
8909 switch (GET_CODE (x))
8924 fputs ("unord", file);
8928 fputs ("neq", file);
8932 fputs ("nlt", file);
8936 fputs ("nle", file);
8939 fputs ("ord", file);
8946 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8947 if (ASSEMBLER_DIALECT == ASM_ATT)
8949 switch (GET_MODE (x))
8951 case HImode: putc ('w', file); break;
8953 case SFmode: putc ('l', file); break;
8955 case DFmode: putc ('q', file); break;
8956 default: gcc_unreachable ();
8963 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8966 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8967 if (ASSEMBLER_DIALECT == ASM_ATT)
8970 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8973 /* Like above, but reverse condition */
8975 /* Check to see if argument to %c is really a constant
8976 and not a condition code which needs to be reversed. */
8977 if (!COMPARISON_P (x))
8979 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8982 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8985 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8986 if (ASSEMBLER_DIALECT == ASM_ATT)
8989 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8993 /* It doesn't actually matter what mode we use here, as we're
8994 only going to use this for printing. */
8995 x = adjust_address_nv (x, DImode, 8);
9002 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9005 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9008 int pred_val = INTVAL (XEXP (x, 0));
9010 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9011 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9013 int taken = pred_val > REG_BR_PROB_BASE / 2;
9014 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9016 /* Emit hints only in the case default branch prediction
9017 heuristics would fail. */
9018 if (taken != cputaken)
9020 /* We use 3e (DS) prefix for taken branches and
9021 2e (CS) prefix for not taken branches. */
9023 fputs ("ds ; ", file);
9025 fputs ("cs ; ", file);
9033 switch (GET_CODE (x))
9036 fputs ("neq", file);
9043 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9047 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9058 fputs ("unord", file);
9061 fputs ("ord", file);
9064 fputs ("ueq", file);
9067 fputs ("nlt", file);
9070 fputs ("nle", file);
9073 fputs ("ule", file);
9076 fputs ("ult", file);
9079 fputs ("une", file);
9088 fputs (" ; ", file);
9095 output_operand_lossage ("invalid operand code '%c'", code);
9100 print_reg (x, code, file);
9104 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9105 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9106 && GET_MODE (x) != BLKmode)
9109 switch (GET_MODE_SIZE (GET_MODE (x)))
9111 case 1: size = "BYTE"; break;
9112 case 2: size = "WORD"; break;
9113 case 4: size = "DWORD"; break;
9114 case 8: size = "QWORD"; break;
9115 case 12: size = "XWORD"; break;
9117 if (GET_MODE (x) == XFmode)
9126 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9129 else if (code == 'w')
9131 else if (code == 'k')
9135 fputs (" PTR ", file);
9139 /* Avoid (%rip) for call operands. */
9140 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9141 && !CONST_INT_P (x))
9142 output_addr_const (file, x);
9143 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9144 output_operand_lossage ("invalid constraints for operand");
9149 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9154 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9155 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9157 if (ASSEMBLER_DIALECT == ASM_ATT)
9159 fprintf (file, "0x%08lx", l);
9162 /* These float cases don't actually occur as immediate operands. */
9163 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9167 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9168 fprintf (file, "%s", dstr);
9171 else if (GET_CODE (x) == CONST_DOUBLE
9172 && GET_MODE (x) == XFmode)
9176 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9177 fprintf (file, "%s", dstr);
9182 /* We have patterns that allow zero sets of memory, for instance.
9183 In 64-bit mode, we should probably support all 8-byte vectors,
9184 since we can in fact encode that into an immediate. */
9185 if (GET_CODE (x) == CONST_VECTOR)
9187 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9193 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9195 if (ASSEMBLER_DIALECT == ASM_ATT)
9198 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9199 || GET_CODE (x) == LABEL_REF)
9201 if (ASSEMBLER_DIALECT == ASM_ATT)
9204 fputs ("OFFSET FLAT:", file);
9207 if (CONST_INT_P (x))
9208 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9210 output_pic_addr_const (file, x, code);
9212 output_addr_const (file, x);
9216 /* Print a memory operand whose address is ADDR. */
9219 print_operand_address (FILE *file, rtx addr)
9221 struct ix86_address parts;
9222 rtx base, index, disp;
9224 int ok = ix86_decompose_address (addr, &parts);
9229 index = parts.index;
9231 scale = parts.scale;
9239 if (ASSEMBLER_DIALECT == ASM_ATT)
9241 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9247 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9248 if (TARGET_64BIT && !base && !index)
9252 if (GET_CODE (disp) == CONST
9253 && GET_CODE (XEXP (disp, 0)) == PLUS
9254 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9255 symbol = XEXP (XEXP (disp, 0), 0);
9257 if (GET_CODE (symbol) == LABEL_REF
9258 || (GET_CODE (symbol) == SYMBOL_REF
9259 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9262 if (!base && !index)
9264 /* Displacement only requires special attention. */
9266 if (CONST_INT_P (disp))
9268 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9269 fputs ("ds:", file);
9270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9273 output_pic_addr_const (file, disp, 0);
9275 output_addr_const (file, disp);
9279 if (ASSEMBLER_DIALECT == ASM_ATT)
9284 output_pic_addr_const (file, disp, 0);
9285 else if (GET_CODE (disp) == LABEL_REF)
9286 output_asm_label (disp);
9288 output_addr_const (file, disp);
9293 print_reg (base, 0, file);
9297 print_reg (index, 0, file);
9299 fprintf (file, ",%d", scale);
9305 rtx offset = NULL_RTX;
9309 /* Pull out the offset of a symbol; print any symbol itself. */
9310 if (GET_CODE (disp) == CONST
9311 && GET_CODE (XEXP (disp, 0)) == PLUS
9312 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9314 offset = XEXP (XEXP (disp, 0), 1);
9315 disp = gen_rtx_CONST (VOIDmode,
9316 XEXP (XEXP (disp, 0), 0));
9320 output_pic_addr_const (file, disp, 0);
9321 else if (GET_CODE (disp) == LABEL_REF)
9322 output_asm_label (disp);
9323 else if (CONST_INT_P (disp))
9326 output_addr_const (file, disp);
9332 print_reg (base, 0, file);
9335 if (INTVAL (offset) >= 0)
9337 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9341 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9348 print_reg (index, 0, file);
9350 fprintf (file, "*%d", scale);
9358 output_addr_const_extra (FILE *file, rtx x)
9362 if (GET_CODE (x) != UNSPEC)
9365 op = XVECEXP (x, 0, 0);
9366 switch (XINT (x, 1))
9368 case UNSPEC_GOTTPOFF:
9369 output_addr_const (file, op);
9370 /* FIXME: This might be @TPOFF in Sun ld. */
9371 fputs ("@GOTTPOFF", file);
9374 output_addr_const (file, op);
9375 fputs ("@TPOFF", file);
9378 output_addr_const (file, op);
9380 fputs ("@TPOFF", file);
9382 fputs ("@NTPOFF", file);
9385 output_addr_const (file, op);
9386 fputs ("@DTPOFF", file);
9388 case UNSPEC_GOTNTPOFF:
9389 output_addr_const (file, op);
9391 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9392 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9394 fputs ("@GOTNTPOFF", file);
9396 case UNSPEC_INDNTPOFF:
9397 output_addr_const (file, op);
9398 fputs ("@INDNTPOFF", file);
9408 /* Split one or more DImode RTL references into pairs of SImode
9409 references. The RTL can be REG, offsettable MEM, integer constant, or
9410 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9411 split and "num" is its length. lo_half and hi_half are output arrays
9412 that parallel "operands". */
9415 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9419 rtx op = operands[num];
9421 /* simplify_subreg refuse to split volatile memory addresses,
9422 but we still have to handle it. */
9425 lo_half[num] = adjust_address (op, SImode, 0);
9426 hi_half[num] = adjust_address (op, SImode, 4);
9430 lo_half[num] = simplify_gen_subreg (SImode, op,
9431 GET_MODE (op) == VOIDmode
9432 ? DImode : GET_MODE (op), 0);
9433 hi_half[num] = simplify_gen_subreg (SImode, op,
9434 GET_MODE (op) == VOIDmode
9435 ? DImode : GET_MODE (op), 4);
9439 /* Split one or more TImode RTL references into pairs of DImode
9440 references. The RTL can be REG, offsettable MEM, integer constant, or
9441 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9442 split and "num" is its length. lo_half and hi_half are output arrays
9443 that parallel "operands". */
9446 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9450 rtx op = operands[num];
9452 /* simplify_subreg refuse to split volatile memory addresses, but we
9453 still have to handle it. */
9456 lo_half[num] = adjust_address (op, DImode, 0);
9457 hi_half[num] = adjust_address (op, DImode, 8);
9461 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9462 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9467 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9468 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9469 is the expression of the binary operation. The output may either be
9470 emitted here, or returned to the caller, like all output_* functions.
9472 There is no guarantee that the operands are the same mode, as they
9473 might be within FLOAT or FLOAT_EXTEND expressions. */
9475 #ifndef SYSV386_COMPAT
9476 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9477 wants to fix the assemblers because that causes incompatibility
9478 with gcc. No-one wants to fix gcc because that causes
9479 incompatibility with assemblers... You can use the option of
9480 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9481 #define SYSV386_COMPAT 1
9485 output_387_binary_op (rtx insn, rtx *operands)
9487 static char buf[30];
9490 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9492 #ifdef ENABLE_CHECKING
9493 /* Even if we do not want to check the inputs, this documents input
9494 constraints. Which helps in understanding the following code. */
9495 if (STACK_REG_P (operands[0])
9496 && ((REG_P (operands[1])
9497 && REGNO (operands[0]) == REGNO (operands[1])
9498 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9499 || (REG_P (operands[2])
9500 && REGNO (operands[0]) == REGNO (operands[2])
9501 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9502 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9505 gcc_assert (is_sse);
9508 switch (GET_CODE (operands[3]))
9511 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9512 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9520 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9521 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9529 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9530 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9538 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9539 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9553 if (GET_MODE (operands[0]) == SFmode)
9554 strcat (buf, "ss\t{%2, %0|%0, %2}");
9556 strcat (buf, "sd\t{%2, %0|%0, %2}");
9561 switch (GET_CODE (operands[3]))
9565 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9567 rtx temp = operands[2];
9568 operands[2] = operands[1];
9572 /* know operands[0] == operands[1]. */
9574 if (MEM_P (operands[2]))
9580 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9582 if (STACK_TOP_P (operands[0]))
9583 /* How is it that we are storing to a dead operand[2]?
9584 Well, presumably operands[1] is dead too. We can't
9585 store the result to st(0) as st(0) gets popped on this
9586 instruction. Instead store to operands[2] (which I
9587 think has to be st(1)). st(1) will be popped later.
9588 gcc <= 2.8.1 didn't have this check and generated
9589 assembly code that the Unixware assembler rejected. */
9590 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9592 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9596 if (STACK_TOP_P (operands[0]))
9597 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9599 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9604 if (MEM_P (operands[1]))
9610 if (MEM_P (operands[2]))
9616 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9619 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9620 derived assemblers, confusingly reverse the direction of
9621 the operation for fsub{r} and fdiv{r} when the
9622 destination register is not st(0). The Intel assembler
9623 doesn't have this brain damage. Read !SYSV386_COMPAT to
9624 figure out what the hardware really does. */
9625 if (STACK_TOP_P (operands[0]))
9626 p = "{p\t%0, %2|rp\t%2, %0}";
9628 p = "{rp\t%2, %0|p\t%0, %2}";
9630 if (STACK_TOP_P (operands[0]))
9631 /* As above for fmul/fadd, we can't store to st(0). */
9632 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9634 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9639 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9642 if (STACK_TOP_P (operands[0]))
9643 p = "{rp\t%0, %1|p\t%1, %0}";
9645 p = "{p\t%1, %0|rp\t%0, %1}";
9647 if (STACK_TOP_P (operands[0]))
9648 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9650 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9655 if (STACK_TOP_P (operands[0]))
9657 if (STACK_TOP_P (operands[1]))
9658 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9660 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9663 else if (STACK_TOP_P (operands[1]))
9666 p = "{\t%1, %0|r\t%0, %1}";
9668 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9674 p = "{r\t%2, %0|\t%0, %2}";
9676 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9689 /* Return needed mode for entity in optimize_mode_switching pass. */
9692 ix86_mode_needed (int entity, rtx insn)
9694 enum attr_i387_cw mode;
9696 /* The mode UNINITIALIZED is used to store control word after a
9697 function call or ASM pattern. The mode ANY specify that function
9698 has no requirements on the control word and make no changes in the
9699 bits we are interested in. */
9702 || (NONJUMP_INSN_P (insn)
9703 && (asm_noperands (PATTERN (insn)) >= 0
9704 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9705 return I387_CW_UNINITIALIZED;
9707 if (recog_memoized (insn) < 0)
9710 mode = get_attr_i387_cw (insn);
9715 if (mode == I387_CW_TRUNC)
9720 if (mode == I387_CW_FLOOR)
9725 if (mode == I387_CW_CEIL)
9730 if (mode == I387_CW_MASK_PM)
9741 /* Output code to initialize control word copies used by trunc?f?i and
9742 rounding patterns. CURRENT_MODE is set to current control word,
9743 while NEW_MODE is set to new control word. */
9746 emit_i387_cw_initialization (int mode)
9748 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9751 enum ix86_stack_slot slot;
9753 rtx reg = gen_reg_rtx (HImode);
9755 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9756 emit_move_insn (reg, copy_rtx (stored_mode));
9758 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9763 /* round toward zero (truncate) */
9764 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9765 slot = SLOT_CW_TRUNC;
9769 /* round down toward -oo */
9770 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9771 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9772 slot = SLOT_CW_FLOOR;
9776 /* round up toward +oo */
9777 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9778 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9779 slot = SLOT_CW_CEIL;
9782 case I387_CW_MASK_PM:
9783 /* mask precision exception for nearbyint() */
9784 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9785 slot = SLOT_CW_MASK_PM;
9797 /* round toward zero (truncate) */
9798 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9799 slot = SLOT_CW_TRUNC;
9803 /* round down toward -oo */
9804 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9805 slot = SLOT_CW_FLOOR;
9809 /* round up toward +oo */
9810 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9811 slot = SLOT_CW_CEIL;
9814 case I387_CW_MASK_PM:
9815 /* mask precision exception for nearbyint() */
9816 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9817 slot = SLOT_CW_MASK_PM;
9825 gcc_assert (slot < MAX_386_STACK_LOCALS);
9827 new_mode = assign_386_stack_local (HImode, slot);
9828 emit_move_insn (new_mode, reg);
9831 /* Output code for INSN to convert a float to a signed int. OPERANDS
9832 are the insn operands. The output may be [HSD]Imode and the input
9833 operand may be [SDX]Fmode. */
9836 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9838 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9839 int dimode_p = GET_MODE (operands[0]) == DImode;
9840 int round_mode = get_attr_i387_cw (insn);
9842 /* Jump through a hoop or two for DImode, since the hardware has no
9843 non-popping instruction. We used to do this a different way, but
9844 that was somewhat fragile and broke with post-reload splitters. */
9845 if ((dimode_p || fisttp) && !stack_top_dies)
9846 output_asm_insn ("fld\t%y1", operands);
9848 gcc_assert (STACK_TOP_P (operands[1]));
9849 gcc_assert (MEM_P (operands[0]));
9850 gcc_assert (GET_MODE (operands[1]) != TFmode);
9853 output_asm_insn ("fisttp%z0\t%0", operands);
9856 if (round_mode != I387_CW_ANY)
9857 output_asm_insn ("fldcw\t%3", operands);
9858 if (stack_top_dies || dimode_p)
9859 output_asm_insn ("fistp%z0\t%0", operands);
9861 output_asm_insn ("fist%z0\t%0", operands);
9862 if (round_mode != I387_CW_ANY)
9863 output_asm_insn ("fldcw\t%2", operands);
9869 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9870 have the values zero or one, indicates the ffreep insn's operand
9871 from the OPERANDS array. */
9874 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9876 if (TARGET_USE_FFREEP)
9877 #if HAVE_AS_IX86_FFREEP
9878 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9881 static char retval[] = ".word\t0xc_df";
9882 int regno = REGNO (operands[opno]);
9884 gcc_assert (FP_REGNO_P (regno));
9886 retval[9] = '0' + (regno - FIRST_STACK_REG);
9891 return opno ? "fstp\t%y1" : "fstp\t%y0";
9895 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9896 should be used. UNORDERED_P is true when fucom should be used. */
9899 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9902 rtx cmp_op0, cmp_op1;
9903 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9907 cmp_op0 = operands[0];
9908 cmp_op1 = operands[1];
9912 cmp_op0 = operands[1];
9913 cmp_op1 = operands[2];
9918 if (GET_MODE (operands[0]) == SFmode)
9920 return "ucomiss\t{%1, %0|%0, %1}";
9922 return "comiss\t{%1, %0|%0, %1}";
9925 return "ucomisd\t{%1, %0|%0, %1}";
9927 return "comisd\t{%1, %0|%0, %1}";
9930 gcc_assert (STACK_TOP_P (cmp_op0));
9932 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9934 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9938 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9939 return output_387_ffreep (operands, 1);
9942 return "ftst\n\tfnstsw\t%0";
9945 if (STACK_REG_P (cmp_op1)
9947 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9948 && REGNO (cmp_op1) != FIRST_STACK_REG)
9950 /* If both the top of the 387 stack dies, and the other operand
9951 is also a stack register that dies, then this must be a
9952 `fcompp' float compare */
9956 /* There is no double popping fcomi variant. Fortunately,
9957 eflags is immune from the fstp's cc clobbering. */
9959 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9961 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9962 return output_387_ffreep (operands, 0);
9967 return "fucompp\n\tfnstsw\t%0";
9969 return "fcompp\n\tfnstsw\t%0";
9974 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9976 static const char * const alt[16] =
9978 "fcom%z2\t%y2\n\tfnstsw\t%0",
9979 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9980 "fucom%z2\t%y2\n\tfnstsw\t%0",
9981 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9983 "ficom%z2\t%y2\n\tfnstsw\t%0",
9984 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9988 "fcomi\t{%y1, %0|%0, %y1}",
9989 "fcomip\t{%y1, %0|%0, %y1}",
9990 "fucomi\t{%y1, %0|%0, %y1}",
9991 "fucomip\t{%y1, %0|%0, %y1}",
10002 mask = eflags_p << 3;
10003 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10004 mask |= unordered_p << 1;
10005 mask |= stack_top_dies;
10007 gcc_assert (mask < 16);
10016 ix86_output_addr_vec_elt (FILE *file, int value)
10018 const char *directive = ASM_LONG;
10022 directive = ASM_QUAD;
10024 gcc_assert (!TARGET_64BIT);
10027 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10031 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10033 const char *directive = ASM_LONG;
10036 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10037 directive = ASM_QUAD;
10039 gcc_assert (!TARGET_64BIT);
10041 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10042 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10043 fprintf (file, "%s%s%d-%s%d\n",
10044 directive, LPREFIX, value, LPREFIX, rel);
10045 else if (HAVE_AS_GOTOFF_IN_DATA)
10046 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10048 else if (TARGET_MACHO)
10050 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10051 machopic_output_function_base_name (file);
10052 fprintf(file, "\n");
10056 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10057 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10060 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10064 ix86_expand_clear (rtx dest)
10068 /* We play register width games, which are only valid after reload. */
10069 gcc_assert (reload_completed);
10071 /* Avoid HImode and its attendant prefix byte. */
10072 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10073 dest = gen_rtx_REG (SImode, REGNO (dest));
10074 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10076 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10077 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10079 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10080 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10086 /* X is an unchanging MEM. If it is a constant pool reference, return
10087 the constant pool rtx, else NULL. */
10090 maybe_get_pool_constant (rtx x)
10092 x = ix86_delegitimize_address (XEXP (x, 0));
10094 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10095 return get_pool_constant (x);
10101 ix86_expand_move (enum machine_mode mode, rtx operands[])
10104 enum tls_model model;
10109 if (GET_CODE (op1) == SYMBOL_REF)
10111 model = SYMBOL_REF_TLS_MODEL (op1);
10114 op1 = legitimize_tls_address (op1, model, true);
10115 op1 = force_operand (op1, op0);
10119 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10120 && SYMBOL_REF_DLLIMPORT_P (op1))
10121 op1 = legitimize_dllimport_symbol (op1, false);
10123 else if (GET_CODE (op1) == CONST
10124 && GET_CODE (XEXP (op1, 0)) == PLUS
10125 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10127 rtx addend = XEXP (XEXP (op1, 0), 1);
10128 rtx symbol = XEXP (XEXP (op1, 0), 0);
10131 model = SYMBOL_REF_TLS_MODEL (symbol);
10133 tmp = legitimize_tls_address (symbol, model, true);
10134 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10135 && SYMBOL_REF_DLLIMPORT_P (symbol))
10136 tmp = legitimize_dllimport_symbol (symbol, true);
10140 tmp = force_operand (tmp, NULL);
10141 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10142 op0, 1, OPTAB_DIRECT);
10148 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10150 if (TARGET_MACHO && !TARGET_64BIT)
10155 rtx temp = ((reload_in_progress
10156 || ((op0 && REG_P (op0))
10158 ? op0 : gen_reg_rtx (Pmode));
10159 op1 = machopic_indirect_data_reference (op1, temp);
10160 op1 = machopic_legitimize_pic_address (op1, mode,
10161 temp == op1 ? 0 : temp);
10163 else if (MACHOPIC_INDIRECT)
10164 op1 = machopic_indirect_data_reference (op1, 0);
10172 op1 = force_reg (Pmode, op1);
10173 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10175 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10176 op1 = legitimize_pic_address (op1, reg);
10185 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10186 || !push_operand (op0, mode))
10188 op1 = force_reg (mode, op1);
10190 if (push_operand (op0, mode)
10191 && ! general_no_elim_operand (op1, mode))
10192 op1 = copy_to_mode_reg (mode, op1);
10194 /* Force large constants in 64bit compilation into register
10195 to get them CSEed. */
10196 if (can_create_pseudo_p ()
10197 && (mode == DImode) && TARGET_64BIT
10198 && immediate_operand (op1, mode)
10199 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10200 && !register_operand (op0, mode)
10202 op1 = copy_to_mode_reg (mode, op1);
10204 if (can_create_pseudo_p ()
10205 && FLOAT_MODE_P (mode)
10206 && GET_CODE (op1) == CONST_DOUBLE)
10208 /* If we are loading a floating point constant to a register,
10209 force the value to memory now, since we'll get better code
10210 out the back end. */
10212 op1 = validize_mem (force_const_mem (mode, op1));
10213 if (!register_operand (op0, mode))
10215 rtx temp = gen_reg_rtx (mode);
10216 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10217 emit_move_insn (op0, temp);
10223 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10227 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10229 rtx op0 = operands[0], op1 = operands[1];
10230 unsigned int align = GET_MODE_ALIGNMENT (mode);
10232 /* Force constants other than zero into memory. We do not know how
10233 the instructions used to build constants modify the upper 64 bits
10234 of the register, once we have that information we may be able
10235 to handle some of them more efficiently. */
10236 if (can_create_pseudo_p ()
10237 && register_operand (op0, mode)
10238 && (CONSTANT_P (op1)
10239 || (GET_CODE (op1) == SUBREG
10240 && CONSTANT_P (SUBREG_REG (op1))))
10241 && standard_sse_constant_p (op1) <= 0)
10242 op1 = validize_mem (force_const_mem (mode, op1));
10244 /* TDmode values are passed as TImode on the stack. TImode values
10245 are moved via xmm registers, and moving them to stack can result in
10246 unaligned memory access. Use ix86_expand_vector_move_misalign()
10247 if memory operand is not aligned correctly. */
10248 if (can_create_pseudo_p ()
10249 && (mode == TImode) && !TARGET_64BIT
10250 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10251 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10255 /* ix86_expand_vector_move_misalign() does not like constants ... */
10256 if (CONSTANT_P (op1)
10257 || (GET_CODE (op1) == SUBREG
10258 && CONSTANT_P (SUBREG_REG (op1))))
10259 op1 = validize_mem (force_const_mem (mode, op1));
10261 /* ... nor both arguments in memory. */
10262 if (!register_operand (op0, mode)
10263 && !register_operand (op1, mode))
10264 op1 = force_reg (mode, op1);
10266 tmp[0] = op0; tmp[1] = op1;
10267 ix86_expand_vector_move_misalign (mode, tmp);
10271 /* Make operand1 a register if it isn't already. */
10272 if (can_create_pseudo_p ()
10273 && !register_operand (op0, mode)
10274 && !register_operand (op1, mode))
10276 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10280 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10283 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10284 straight to ix86_expand_vector_move. */
10285 /* Code generation for scalar reg-reg moves of single and double precision data:
10286 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10290 if (x86_sse_partial_reg_dependency == true)
10295 Code generation for scalar loads of double precision data:
10296 if (x86_sse_split_regs == true)
10297 movlpd mem, reg (gas syntax)
10301 Code generation for unaligned packed loads of single precision data
10302 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10303 if (x86_sse_unaligned_move_optimal)
10306 if (x86_sse_partial_reg_dependency == true)
10318 Code generation for unaligned packed loads of double precision data
10319 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10320 if (x86_sse_unaligned_move_optimal)
10323 if (x86_sse_split_regs == true)
10336 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10345 /* If we're optimizing for size, movups is the smallest. */
10348 op0 = gen_lowpart (V4SFmode, op0);
10349 op1 = gen_lowpart (V4SFmode, op1);
10350 emit_insn (gen_sse_movups (op0, op1));
10354 /* ??? If we have typed data, then it would appear that using
10355 movdqu is the only way to get unaligned data loaded with
10357 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10359 op0 = gen_lowpart (V16QImode, op0);
10360 op1 = gen_lowpart (V16QImode, op1);
10361 emit_insn (gen_sse2_movdqu (op0, op1));
10365 if (TARGET_SSE2 && mode == V2DFmode)
10369 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10371 op0 = gen_lowpart (V2DFmode, op0);
10372 op1 = gen_lowpart (V2DFmode, op1);
10373 emit_insn (gen_sse2_movupd (op0, op1));
10377 /* When SSE registers are split into halves, we can avoid
10378 writing to the top half twice. */
10379 if (TARGET_SSE_SPLIT_REGS)
10381 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10386 /* ??? Not sure about the best option for the Intel chips.
10387 The following would seem to satisfy; the register is
10388 entirely cleared, breaking the dependency chain. We
10389 then store to the upper half, with a dependency depth
10390 of one. A rumor has it that Intel recommends two movsd
10391 followed by an unpacklpd, but this is unconfirmed. And
10392 given that the dependency depth of the unpacklpd would
10393 still be one, I'm not sure why this would be better. */
10394 zero = CONST0_RTX (V2DFmode);
10397 m = adjust_address (op1, DFmode, 0);
10398 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10399 m = adjust_address (op1, DFmode, 8);
10400 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10404 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10406 op0 = gen_lowpart (V4SFmode, op0);
10407 op1 = gen_lowpart (V4SFmode, op1);
10408 emit_insn (gen_sse_movups (op0, op1));
10412 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10413 emit_move_insn (op0, CONST0_RTX (mode));
10415 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10417 if (mode != V4SFmode)
10418 op0 = gen_lowpart (V4SFmode, op0);
10419 m = adjust_address (op1, V2SFmode, 0);
10420 emit_insn (gen_sse_loadlps (op0, op0, m));
10421 m = adjust_address (op1, V2SFmode, 8);
10422 emit_insn (gen_sse_loadhps (op0, op0, m));
10425 else if (MEM_P (op0))
10427 /* If we're optimizing for size, movups is the smallest. */
10430 op0 = gen_lowpart (V4SFmode, op0);
10431 op1 = gen_lowpart (V4SFmode, op1);
10432 emit_insn (gen_sse_movups (op0, op1));
10436 /* ??? Similar to above, only less clear because of quote
10437 typeless stores unquote. */
10438 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10439 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10441 op0 = gen_lowpart (V16QImode, op0);
10442 op1 = gen_lowpart (V16QImode, op1);
10443 emit_insn (gen_sse2_movdqu (op0, op1));
10447 if (TARGET_SSE2 && mode == V2DFmode)
10449 m = adjust_address (op0, DFmode, 0);
10450 emit_insn (gen_sse2_storelpd (m, op1));
10451 m = adjust_address (op0, DFmode, 8);
10452 emit_insn (gen_sse2_storehpd (m, op1));
10456 if (mode != V4SFmode)
10457 op1 = gen_lowpart (V4SFmode, op1);
10458 m = adjust_address (op0, V2SFmode, 0);
10459 emit_insn (gen_sse_storelps (m, op1));
10460 m = adjust_address (op0, V2SFmode, 8);
10461 emit_insn (gen_sse_storehps (m, op1));
10465 gcc_unreachable ();
10468 /* Expand a push in MODE. This is some mode for which we do not support
10469 proper push instructions, at least from the registers that we expect
10470 the value to live in. */
10473 ix86_expand_push (enum machine_mode mode, rtx x)
10477 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10478 GEN_INT (-GET_MODE_SIZE (mode)),
10479 stack_pointer_rtx, 1, OPTAB_DIRECT);
10480 if (tmp != stack_pointer_rtx)
10481 emit_move_insn (stack_pointer_rtx, tmp);
10483 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10484 emit_move_insn (tmp, x);
10487 /* Helper function of ix86_fixup_binary_operands to canonicalize
10488 operand order. Returns true if the operands should be swapped. */
10491 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10494 rtx dst = operands[0];
10495 rtx src1 = operands[1];
10496 rtx src2 = operands[2];
10498 /* If the operation is not commutative, we can't do anything. */
10499 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10502 /* Highest priority is that src1 should match dst. */
10503 if (rtx_equal_p (dst, src1))
10505 if (rtx_equal_p (dst, src2))
10508 /* Next highest priority is that immediate constants come second. */
10509 if (immediate_operand (src2, mode))
10511 if (immediate_operand (src1, mode))
10514 /* Lowest priority is that memory references should come second. */
10524 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10525 destination to use for the operation. If different from the true
10526 destination in operands[0], a copy operation will be required. */
10529 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10532 rtx dst = operands[0];
10533 rtx src1 = operands[1];
10534 rtx src2 = operands[2];
10536 /* Canonicalize operand order. */
10537 if (ix86_swap_binary_operands_p (code, mode, operands))
10544 /* Both source operands cannot be in memory. */
10545 if (MEM_P (src1) && MEM_P (src2))
10547 /* Optimization: Only read from memory once. */
10548 if (rtx_equal_p (src1, src2))
10550 src2 = force_reg (mode, src2);
10554 src2 = force_reg (mode, src2);
10557 /* If the destination is memory, and we do not have matching source
10558 operands, do things in registers. */
10559 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10560 dst = gen_reg_rtx (mode);
10562 /* Source 1 cannot be a constant. */
10563 if (CONSTANT_P (src1))
10564 src1 = force_reg (mode, src1);
10566 /* Source 1 cannot be a non-matching memory. */
10567 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10568 src1 = force_reg (mode, src1);
10570 operands[1] = src1;
10571 operands[2] = src2;
10575 /* Similarly, but assume that the destination has already been
10576 set up properly. */
10579 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10580 enum machine_mode mode, rtx operands[])
10582 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10583 gcc_assert (dst == operands[0]);
10586 /* Attempt to expand a binary operator. Make the expansion closer to the
10587 actual machine, then just general_operand, which will allow 3 separate
10588 memory references (one output, two input) in a single insn. */
10591 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10594 rtx src1, src2, dst, op, clob;
10596 dst = ix86_fixup_binary_operands (code, mode, operands);
10597 src1 = operands[1];
10598 src2 = operands[2];
10600 /* Emit the instruction. */
10602 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10603 if (reload_in_progress)
10605 /* Reload doesn't know about the flags register, and doesn't know that
10606 it doesn't want to clobber it. We can only do this with PLUS. */
10607 gcc_assert (code == PLUS);
10612 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10613 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10616 /* Fix up the destination if needed. */
10617 if (dst != operands[0])
10618 emit_move_insn (operands[0], dst);
10621 /* Return TRUE or FALSE depending on whether the binary operator meets the
10622 appropriate constraints. */
10625 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10628 rtx dst = operands[0];
10629 rtx src1 = operands[1];
10630 rtx src2 = operands[2];
10632 /* Both source operands cannot be in memory. */
10633 if (MEM_P (src1) && MEM_P (src2))
10636 /* Canonicalize operand order for commutative operators. */
10637 if (ix86_swap_binary_operands_p (code, mode, operands))
10644 /* If the destination is memory, we must have a matching source operand. */
10645 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10648 /* Source 1 cannot be a constant. */
10649 if (CONSTANT_P (src1))
10652 /* Source 1 cannot be a non-matching memory. */
10653 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10659 /* Attempt to expand a unary operator. Make the expansion closer to the
10660 actual machine, then just general_operand, which will allow 2 separate
10661 memory references (one output, one input) in a single insn. */
10664 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10667 int matching_memory;
10668 rtx src, dst, op, clob;
10673 /* If the destination is memory, and we do not have matching source
10674 operands, do things in registers. */
10675 matching_memory = 0;
10678 if (rtx_equal_p (dst, src))
10679 matching_memory = 1;
10681 dst = gen_reg_rtx (mode);
10684 /* When source operand is memory, destination must match. */
10685 if (MEM_P (src) && !matching_memory)
10686 src = force_reg (mode, src);
10688 /* Emit the instruction. */
10690 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10691 if (reload_in_progress || code == NOT)
10693 /* Reload doesn't know about the flags register, and doesn't know that
10694 it doesn't want to clobber it. */
10695 gcc_assert (code == NOT);
10700 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10701 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10704 /* Fix up the destination if needed. */
10705 if (dst != operands[0])
10706 emit_move_insn (operands[0], dst);
10709 /* Return TRUE or FALSE depending on whether the unary operator meets the
10710 appropriate constraints. */
10713 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10714 enum machine_mode mode ATTRIBUTE_UNUSED,
10715 rtx operands[2] ATTRIBUTE_UNUSED)
10717 /* If one of operands is memory, source and destination must match. */
10718 if ((MEM_P (operands[0])
10719 || MEM_P (operands[1]))
10720 && ! rtx_equal_p (operands[0], operands[1]))
10725 /* Post-reload splitter for converting an SF or DFmode value in an
10726 SSE register into an unsigned SImode. */
10729 ix86_split_convert_uns_si_sse (rtx operands[])
10731 enum machine_mode vecmode;
10732 rtx value, large, zero_or_two31, input, two31, x;
10734 large = operands[1];
10735 zero_or_two31 = operands[2];
10736 input = operands[3];
10737 two31 = operands[4];
10738 vecmode = GET_MODE (large);
10739 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10741 /* Load up the value into the low element. We must ensure that the other
10742 elements are valid floats -- zero is the easiest such value. */
10745 if (vecmode == V4SFmode)
10746 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10748 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10752 input = gen_rtx_REG (vecmode, REGNO (input));
10753 emit_move_insn (value, CONST0_RTX (vecmode));
10754 if (vecmode == V4SFmode)
10755 emit_insn (gen_sse_movss (value, value, input));
10757 emit_insn (gen_sse2_movsd (value, value, input));
10760 emit_move_insn (large, two31);
10761 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10763 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10764 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10766 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10767 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10769 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10770 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10772 large = gen_rtx_REG (V4SImode, REGNO (large));
10773 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10775 x = gen_rtx_REG (V4SImode, REGNO (value));
10776 if (vecmode == V4SFmode)
10777 emit_insn (gen_sse2_cvttps2dq (x, value));
10779 emit_insn (gen_sse2_cvttpd2dq (x, value));
10782 emit_insn (gen_xorv4si3 (value, value, large));
10785 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10786 Expects the 64-bit DImode to be supplied in a pair of integral
10787 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10788 -mfpmath=sse, !optimize_size only. */
10791 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10793 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10794 rtx int_xmm, fp_xmm;
10795 rtx biases, exponents;
10798 int_xmm = gen_reg_rtx (V4SImode);
10799 if (TARGET_INTER_UNIT_MOVES)
10800 emit_insn (gen_movdi_to_sse (int_xmm, input));
10801 else if (TARGET_SSE_SPLIT_REGS)
10803 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10804 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10808 x = gen_reg_rtx (V2DImode);
10809 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10810 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10813 x = gen_rtx_CONST_VECTOR (V4SImode,
10814 gen_rtvec (4, GEN_INT (0x43300000UL),
10815 GEN_INT (0x45300000UL),
10816 const0_rtx, const0_rtx));
10817 exponents = validize_mem (force_const_mem (V4SImode, x));
10819 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10820 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10822 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10823 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10824 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10825 (0x1.0p84 + double(fp_value_hi_xmm)).
10826 Note these exponents differ by 32. */
10828 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10830 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10831 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10832 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10833 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10834 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10835 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10836 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10837 biases = validize_mem (force_const_mem (V2DFmode, biases));
10838 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10840 /* Add the upper and lower DFmode values together. */
10842 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10845 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10846 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10847 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10850 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10853 /* Convert an unsigned SImode value into a DFmode. Only currently used
10854 for SSE, but applicable anywhere. */
10857 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10859 REAL_VALUE_TYPE TWO31r;
10862 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10863 NULL, 1, OPTAB_DIRECT);
10865 fp = gen_reg_rtx (DFmode);
10866 emit_insn (gen_floatsidf2 (fp, x));
10868 real_ldexp (&TWO31r, &dconst1, 31);
10869 x = const_double_from_real_value (TWO31r, DFmode);
10871 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10873 emit_move_insn (target, x);
10876 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10877 32-bit mode; otherwise we have a direct convert instruction. */
10880 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10882 REAL_VALUE_TYPE TWO32r;
10883 rtx fp_lo, fp_hi, x;
10885 fp_lo = gen_reg_rtx (DFmode);
10886 fp_hi = gen_reg_rtx (DFmode);
10888 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10890 real_ldexp (&TWO32r, &dconst1, 32);
10891 x = const_double_from_real_value (TWO32r, DFmode);
10892 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10894 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10896 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10899 emit_move_insn (target, x);
10902 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10903 For x86_32, -mfpmath=sse, !optimize_size only. */
10905 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10907 REAL_VALUE_TYPE ONE16r;
10908 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10910 real_ldexp (&ONE16r, &dconst1, 16);
10911 x = const_double_from_real_value (ONE16r, SFmode);
10912 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10913 NULL, 0, OPTAB_DIRECT);
10914 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10915 NULL, 0, OPTAB_DIRECT);
10916 fp_hi = gen_reg_rtx (SFmode);
10917 fp_lo = gen_reg_rtx (SFmode);
10918 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10919 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10920 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10922 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10924 if (!rtx_equal_p (target, fp_hi))
10925 emit_move_insn (target, fp_hi);
10928 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10929 then replicate the value for all elements of the vector
10933 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10940 v = gen_rtvec (4, value, value, value, value);
10941 return gen_rtx_CONST_VECTOR (V4SImode, v);
10945 v = gen_rtvec (2, value, value);
10946 return gen_rtx_CONST_VECTOR (V2DImode, v);
10950 v = gen_rtvec (4, value, value, value, value);
10952 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10953 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10954 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10958 v = gen_rtvec (2, value, value);
10960 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10961 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10964 gcc_unreachable ();
10968 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10969 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10970 for an SSE register. If VECT is true, then replicate the mask for
10971 all elements of the vector register. If INVERT is true, then create
10972 a mask excluding the sign bit. */
10975 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10977 enum machine_mode vec_mode, imode;
10978 HOST_WIDE_INT hi, lo;
10983 /* Find the sign bit, sign extended to 2*HWI. */
10989 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10990 lo = 0x80000000, hi = lo < 0;
10996 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10997 if (HOST_BITS_PER_WIDE_INT >= 64)
10998 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11000 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11006 vec_mode = VOIDmode;
11007 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11008 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11012 gcc_unreachable ();
11016 lo = ~lo, hi = ~hi;
11018 /* Force this value into the low part of a fp vector constant. */
11019 mask = immed_double_const (lo, hi, imode);
11020 mask = gen_lowpart (mode, mask);
11022 if (vec_mode == VOIDmode)
11023 return force_reg (mode, mask);
11025 v = ix86_build_const_vector (mode, vect, mask);
11026 return force_reg (vec_mode, v);
11029 /* Generate code for floating point ABS or NEG. */
11032 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11035 rtx mask, set, use, clob, dst, src;
11036 bool use_sse = false;
11037 bool vector_mode = VECTOR_MODE_P (mode);
11038 enum machine_mode elt_mode = mode;
11042 elt_mode = GET_MODE_INNER (mode);
11045 else if (mode == TFmode)
11047 else if (TARGET_SSE_MATH)
11048 use_sse = SSE_FLOAT_MODE_P (mode);
11050 /* NEG and ABS performed with SSE use bitwise mask operations.
11051 Create the appropriate mask now. */
11053 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11062 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11063 set = gen_rtx_SET (VOIDmode, dst, set);
11068 set = gen_rtx_fmt_e (code, mode, src);
11069 set = gen_rtx_SET (VOIDmode, dst, set);
11072 use = gen_rtx_USE (VOIDmode, mask);
11073 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11074 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11075 gen_rtvec (3, set, use, clob)));
11082 /* Expand a copysign operation. Special case operand 0 being a constant. */
11085 ix86_expand_copysign (rtx operands[])
11087 enum machine_mode mode, vmode;
11088 rtx dest, op0, op1, mask, nmask;
11090 dest = operands[0];
11094 mode = GET_MODE (dest);
11095 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11097 if (GET_CODE (op0) == CONST_DOUBLE)
11099 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11101 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11102 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11104 if (mode == SFmode || mode == DFmode)
11106 if (op0 == CONST0_RTX (mode))
11107 op0 = CONST0_RTX (vmode);
11112 if (mode == SFmode)
11113 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11114 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11116 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11117 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11121 mask = ix86_build_signbit_mask (mode, 0, 0);
11123 if (mode == SFmode)
11124 copysign_insn = gen_copysignsf3_const;
11125 else if (mode == DFmode)
11126 copysign_insn = gen_copysigndf3_const;
11128 copysign_insn = gen_copysigntf3_const;
11130 emit_insn (copysign_insn (dest, op0, op1, mask));
11134 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11136 nmask = ix86_build_signbit_mask (mode, 0, 1);
11137 mask = ix86_build_signbit_mask (mode, 0, 0);
11139 if (mode == SFmode)
11140 copysign_insn = gen_copysignsf3_var;
11141 else if (mode == DFmode)
11142 copysign_insn = gen_copysigndf3_var;
11144 copysign_insn = gen_copysigntf3_var;
11146 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11150 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11151 be a constant, and so has already been expanded into a vector constant. */
11154 ix86_split_copysign_const (rtx operands[])
11156 enum machine_mode mode, vmode;
11157 rtx dest, op0, op1, mask, x;
11159 dest = operands[0];
11162 mask = operands[3];
11164 mode = GET_MODE (dest);
11165 vmode = GET_MODE (mask);
11167 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11168 x = gen_rtx_AND (vmode, dest, mask);
11169 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11171 if (op0 != CONST0_RTX (vmode))
11173 x = gen_rtx_IOR (vmode, dest, op0);
11174 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11178 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11179 so we have to do two masks. */
11182 ix86_split_copysign_var (rtx operands[])
11184 enum machine_mode mode, vmode;
11185 rtx dest, scratch, op0, op1, mask, nmask, x;
11187 dest = operands[0];
11188 scratch = operands[1];
11191 nmask = operands[4];
11192 mask = operands[5];
11194 mode = GET_MODE (dest);
11195 vmode = GET_MODE (mask);
11197 if (rtx_equal_p (op0, op1))
11199 /* Shouldn't happen often (it's useless, obviously), but when it does
11200 we'd generate incorrect code if we continue below. */
11201 emit_move_insn (dest, op0);
11205 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11207 gcc_assert (REGNO (op1) == REGNO (scratch));
11209 x = gen_rtx_AND (vmode, scratch, mask);
11210 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11213 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11214 x = gen_rtx_NOT (vmode, dest);
11215 x = gen_rtx_AND (vmode, x, op0);
11216 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11220 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11222 x = gen_rtx_AND (vmode, scratch, mask);
11224 else /* alternative 2,4 */
11226 gcc_assert (REGNO (mask) == REGNO (scratch));
11227 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11228 x = gen_rtx_AND (vmode, scratch, op1);
11230 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11232 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11234 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11235 x = gen_rtx_AND (vmode, dest, nmask);
11237 else /* alternative 3,4 */
11239 gcc_assert (REGNO (nmask) == REGNO (dest));
11241 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11242 x = gen_rtx_AND (vmode, dest, op0);
11244 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11247 x = gen_rtx_IOR (vmode, dest, scratch);
11248 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11251 /* Return TRUE or FALSE depending on whether the first SET in INSN
11252 has source and destination with matching CC modes, and that the
11253 CC mode is at least as constrained as REQ_MODE. */
11256 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11259 enum machine_mode set_mode;
11261 set = PATTERN (insn);
11262 if (GET_CODE (set) == PARALLEL)
11263 set = XVECEXP (set, 0, 0);
11264 gcc_assert (GET_CODE (set) == SET);
11265 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11267 set_mode = GET_MODE (SET_DEST (set));
11271 if (req_mode != CCNOmode
11272 && (req_mode != CCmode
11273 || XEXP (SET_SRC (set), 1) != const0_rtx))
11277 if (req_mode == CCGCmode)
11281 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11285 if (req_mode == CCZmode)
11292 gcc_unreachable ();
11295 return (GET_MODE (SET_SRC (set)) == set_mode);
11298 /* Generate insn patterns to do an integer compare of OPERANDS. */
11301 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11303 enum machine_mode cmpmode;
11306 cmpmode = SELECT_CC_MODE (code, op0, op1);
11307 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11309 /* This is very simple, but making the interface the same as in the
11310 FP case makes the rest of the code easier. */
11311 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11312 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11314 /* Return the test that should be put into the flags user, i.e.
11315 the bcc, scc, or cmov instruction. */
11316 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11319 /* Figure out whether to use ordered or unordered fp comparisons.
11320 Return the appropriate mode to use. */
11323 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11325 /* ??? In order to make all comparisons reversible, we do all comparisons
11326 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11327 all forms trapping and nontrapping comparisons, we can make inequality
11328 comparisons trapping again, since it results in better code when using
11329 FCOM based compares. */
11330 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11334 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11336 enum machine_mode mode = GET_MODE (op0);
11338 if (SCALAR_FLOAT_MODE_P (mode))
11340 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11341 return ix86_fp_compare_mode (code);
11346 /* Only zero flag is needed. */
11347 case EQ: /* ZF=0 */
11348 case NE: /* ZF!=0 */
11350 /* Codes needing carry flag. */
11351 case GEU: /* CF=0 */
11352 case LTU: /* CF=1 */
11353 /* Detect overflow checks. They need just the carry flag. */
11354 if (GET_CODE (op0) == PLUS
11355 && rtx_equal_p (op1, XEXP (op0, 0)))
11359 case GTU: /* CF=0 & ZF=0 */
11360 case LEU: /* CF=1 | ZF=1 */
11361 /* Detect overflow checks. They need just the carry flag. */
11362 if (GET_CODE (op0) == MINUS
11363 && rtx_equal_p (op1, XEXP (op0, 0)))
11367 /* Codes possibly doable only with sign flag when
11368 comparing against zero. */
11369 case GE: /* SF=OF or SF=0 */
11370 case LT: /* SF<>OF or SF=1 */
11371 if (op1 == const0_rtx)
11374 /* For other cases Carry flag is not required. */
11376 /* Codes doable only with sign flag when comparing
11377 against zero, but we miss jump instruction for it
11378 so we need to use relational tests against overflow
11379 that thus needs to be zero. */
11380 case GT: /* ZF=0 & SF=OF */
11381 case LE: /* ZF=1 | SF<>OF */
11382 if (op1 == const0_rtx)
11386 /* strcmp pattern do (use flags) and combine may ask us for proper
11391 gcc_unreachable ();
11395 /* Return the fixed registers used for condition codes. */
11398 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11405 /* If two condition code modes are compatible, return a condition code
11406 mode which is compatible with both. Otherwise, return
11409 static enum machine_mode
11410 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11415 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11418 if ((m1 == CCGCmode && m2 == CCGOCmode)
11419 || (m1 == CCGOCmode && m2 == CCGCmode))
11425 gcc_unreachable ();
11455 /* These are only compatible with themselves, which we already
11461 /* Split comparison code CODE into comparisons we can do using branch
11462 instructions. BYPASS_CODE is comparison code for branch that will
11463 branch around FIRST_CODE and SECOND_CODE. If some of branches
11464 is not required, set value to UNKNOWN.
11465 We never require more than two branches. */
11468 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11469 enum rtx_code *first_code,
11470 enum rtx_code *second_code)
11472 *first_code = code;
11473 *bypass_code = UNKNOWN;
11474 *second_code = UNKNOWN;
11476 /* The fcomi comparison sets flags as follows:
11486 case GT: /* GTU - CF=0 & ZF=0 */
11487 case GE: /* GEU - CF=0 */
11488 case ORDERED: /* PF=0 */
11489 case UNORDERED: /* PF=1 */
11490 case UNEQ: /* EQ - ZF=1 */
11491 case UNLT: /* LTU - CF=1 */
11492 case UNLE: /* LEU - CF=1 | ZF=1 */
11493 case LTGT: /* EQ - ZF=0 */
11495 case LT: /* LTU - CF=1 - fails on unordered */
11496 *first_code = UNLT;
11497 *bypass_code = UNORDERED;
11499 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11500 *first_code = UNLE;
11501 *bypass_code = UNORDERED;
11503 case EQ: /* EQ - ZF=1 - fails on unordered */
11504 *first_code = UNEQ;
11505 *bypass_code = UNORDERED;
11507 case NE: /* NE - ZF=0 - fails on unordered */
11508 *first_code = LTGT;
11509 *second_code = UNORDERED;
11511 case UNGE: /* GEU - CF=0 - fails on unordered */
11513 *second_code = UNORDERED;
11515 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11517 *second_code = UNORDERED;
11520 gcc_unreachable ();
11522 if (!TARGET_IEEE_FP)
11524 *second_code = UNKNOWN;
11525 *bypass_code = UNKNOWN;
11529 /* Return cost of comparison done fcom + arithmetics operations on AX.
11530 All following functions do use number of instructions as a cost metrics.
11531 In future this should be tweaked to compute bytes for optimize_size and
11532 take into account performance of various instructions on various CPUs. */
11534 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11536 if (!TARGET_IEEE_FP)
11538 /* The cost of code output by ix86_expand_fp_compare. */
11562 gcc_unreachable ();
11566 /* Return cost of comparison done using fcomi operation.
11567 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11569 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11571 enum rtx_code bypass_code, first_code, second_code;
11572 /* Return arbitrarily high cost when instruction is not supported - this
11573 prevents gcc from using it. */
11576 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11577 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11580 /* Return cost of comparison done using sahf operation.
11581 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11583 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11585 enum rtx_code bypass_code, first_code, second_code;
11586 /* Return arbitrarily high cost when instruction is not preferred - this
11587 avoids gcc from using it. */
11588 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11590 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11591 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11594 /* Compute cost of the comparison done using any method.
11595 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11597 ix86_fp_comparison_cost (enum rtx_code code)
11599 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11602 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11603 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11605 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11606 if (min > sahf_cost)
11608 if (min > fcomi_cost)
11613 /* Return true if we should use an FCOMI instruction for this
11617 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11619 enum rtx_code swapped_code = swap_condition (code);
11621 return ((ix86_fp_comparison_cost (code)
11622 == ix86_fp_comparison_fcomi_cost (code))
11623 || (ix86_fp_comparison_cost (swapped_code)
11624 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11627 /* Swap, force into registers, or otherwise massage the two operands
11628 to a fp comparison. The operands are updated in place; the new
11629 comparison code is returned. */
11631 static enum rtx_code
11632 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11634 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11635 rtx op0 = *pop0, op1 = *pop1;
11636 enum machine_mode op_mode = GET_MODE (op0);
11637 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11639 /* All of the unordered compare instructions only work on registers.
11640 The same is true of the fcomi compare instructions. The XFmode
11641 compare instructions require registers except when comparing
11642 against zero or when converting operand 1 from fixed point to
11646 && (fpcmp_mode == CCFPUmode
11647 || (op_mode == XFmode
11648 && ! (standard_80387_constant_p (op0) == 1
11649 || standard_80387_constant_p (op1) == 1)
11650 && GET_CODE (op1) != FLOAT)
11651 || ix86_use_fcomi_compare (code)))
11653 op0 = force_reg (op_mode, op0);
11654 op1 = force_reg (op_mode, op1);
11658 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11659 things around if they appear profitable, otherwise force op0
11660 into a register. */
11662 if (standard_80387_constant_p (op0) == 0
11664 && ! (standard_80387_constant_p (op1) == 0
11668 tmp = op0, op0 = op1, op1 = tmp;
11669 code = swap_condition (code);
11673 op0 = force_reg (op_mode, op0);
11675 if (CONSTANT_P (op1))
11677 int tmp = standard_80387_constant_p (op1);
11679 op1 = validize_mem (force_const_mem (op_mode, op1));
11683 op1 = force_reg (op_mode, op1);
11686 op1 = force_reg (op_mode, op1);
11690 /* Try to rearrange the comparison to make it cheaper. */
11691 if (ix86_fp_comparison_cost (code)
11692 > ix86_fp_comparison_cost (swap_condition (code))
11693 && (REG_P (op1) || can_create_pseudo_p ()))
11696 tmp = op0, op0 = op1, op1 = tmp;
11697 code = swap_condition (code);
11699 op0 = force_reg (op_mode, op0);
11707 /* Convert comparison codes we use to represent FP comparison to integer
11708 code that will result in proper branch. Return UNKNOWN if no such code
11712 ix86_fp_compare_code_to_integer (enum rtx_code code)
11741 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11744 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11745 rtx *second_test, rtx *bypass_test)
11747 enum machine_mode fpcmp_mode, intcmp_mode;
11749 int cost = ix86_fp_comparison_cost (code);
11750 enum rtx_code bypass_code, first_code, second_code;
11752 fpcmp_mode = ix86_fp_compare_mode (code);
11753 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11756 *second_test = NULL_RTX;
11758 *bypass_test = NULL_RTX;
11760 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11762 /* Do fcomi/sahf based test when profitable. */
11763 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11764 && (bypass_code == UNKNOWN || bypass_test)
11765 && (second_code == UNKNOWN || second_test))
11767 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11768 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11774 gcc_assert (TARGET_SAHF);
11777 scratch = gen_reg_rtx (HImode);
11778 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11780 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11783 /* The FP codes work out to act like unsigned. */
11784 intcmp_mode = fpcmp_mode;
11786 if (bypass_code != UNKNOWN)
11787 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11788 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11790 if (second_code != UNKNOWN)
11791 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11792 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11797 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11798 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11799 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11801 scratch = gen_reg_rtx (HImode);
11802 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11804 /* In the unordered case, we have to check C2 for NaN's, which
11805 doesn't happen to work out to anything nice combination-wise.
11806 So do some bit twiddling on the value we've got in AH to come
11807 up with an appropriate set of condition codes. */
11809 intcmp_mode = CCNOmode;
11814 if (code == GT || !TARGET_IEEE_FP)
11816 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11821 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11822 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11823 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11824 intcmp_mode = CCmode;
11830 if (code == LT && TARGET_IEEE_FP)
11832 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11833 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11834 intcmp_mode = CCmode;
11839 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11845 if (code == GE || !TARGET_IEEE_FP)
11847 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11852 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11853 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11860 if (code == LE && TARGET_IEEE_FP)
11862 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11863 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11864 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11865 intcmp_mode = CCmode;
11870 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11876 if (code == EQ && TARGET_IEEE_FP)
11878 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11879 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11880 intcmp_mode = CCmode;
11885 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11892 if (code == NE && TARGET_IEEE_FP)
11894 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11895 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11901 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11907 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11911 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11916 gcc_unreachable ();
11920 /* Return the test that should be put into the flags user, i.e.
11921 the bcc, scc, or cmov instruction. */
11922 return gen_rtx_fmt_ee (code, VOIDmode,
11923 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11928 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11931 op0 = ix86_compare_op0;
11932 op1 = ix86_compare_op1;
11935 *second_test = NULL_RTX;
11937 *bypass_test = NULL_RTX;
11939 if (ix86_compare_emitted)
11941 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11942 ix86_compare_emitted = NULL_RTX;
11944 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11946 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11947 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11948 second_test, bypass_test);
11951 ret = ix86_expand_int_compare (code, op0, op1);
11956 /* Return true if the CODE will result in nontrivial jump sequence. */
11958 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11960 enum rtx_code bypass_code, first_code, second_code;
11963 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11964 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11968 ix86_expand_branch (enum rtx_code code, rtx label)
11972 /* If we have emitted a compare insn, go straight to simple.
11973 ix86_expand_compare won't emit anything if ix86_compare_emitted
11975 if (ix86_compare_emitted)
11978 switch (GET_MODE (ix86_compare_op0))
11984 tmp = ix86_expand_compare (code, NULL, NULL);
11985 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11986 gen_rtx_LABEL_REF (VOIDmode, label),
11988 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11997 enum rtx_code bypass_code, first_code, second_code;
11999 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12000 &ix86_compare_op1);
12002 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12004 /* Check whether we will use the natural sequence with one jump. If
12005 so, we can expand jump early. Otherwise delay expansion by
12006 creating compound insn to not confuse optimizers. */
12007 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12009 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12010 gen_rtx_LABEL_REF (VOIDmode, label),
12011 pc_rtx, NULL_RTX, NULL_RTX);
12015 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12016 ix86_compare_op0, ix86_compare_op1);
12017 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12018 gen_rtx_LABEL_REF (VOIDmode, label),
12020 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12022 use_fcomi = ix86_use_fcomi_compare (code);
12023 vec = rtvec_alloc (3 + !use_fcomi);
12024 RTVEC_ELT (vec, 0) = tmp;
12026 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12028 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12031 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12033 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12042 /* Expand DImode branch into multiple compare+branch. */
12044 rtx lo[2], hi[2], label2;
12045 enum rtx_code code1, code2, code3;
12046 enum machine_mode submode;
12048 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12050 tmp = ix86_compare_op0;
12051 ix86_compare_op0 = ix86_compare_op1;
12052 ix86_compare_op1 = tmp;
12053 code = swap_condition (code);
12055 if (GET_MODE (ix86_compare_op0) == DImode)
12057 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12058 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12063 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12064 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12068 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12069 avoid two branches. This costs one extra insn, so disable when
12070 optimizing for size. */
12072 if ((code == EQ || code == NE)
12074 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12079 if (hi[1] != const0_rtx)
12080 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12081 NULL_RTX, 0, OPTAB_WIDEN);
12084 if (lo[1] != const0_rtx)
12085 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12086 NULL_RTX, 0, OPTAB_WIDEN);
12088 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12089 NULL_RTX, 0, OPTAB_WIDEN);
12091 ix86_compare_op0 = tmp;
12092 ix86_compare_op1 = const0_rtx;
12093 ix86_expand_branch (code, label);
12097 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12098 op1 is a constant and the low word is zero, then we can just
12099 examine the high word. Similarly for low word -1 and
12100 less-or-equal-than or greater-than. */
12102 if (CONST_INT_P (hi[1]))
12105 case LT: case LTU: case GE: case GEU:
12106 if (lo[1] == const0_rtx)
12108 ix86_compare_op0 = hi[0];
12109 ix86_compare_op1 = hi[1];
12110 ix86_expand_branch (code, label);
12113 case LE: case LEU: case GT: case GTU:
12114 if (lo[1] == constm1_rtx)
12116 ix86_compare_op0 = hi[0];
12117 ix86_compare_op1 = hi[1];
12118 ix86_expand_branch (code, label);
12125 /* Otherwise, we need two or three jumps. */
12127 label2 = gen_label_rtx ();
12130 code2 = swap_condition (code);
12131 code3 = unsigned_condition (code);
12135 case LT: case GT: case LTU: case GTU:
12138 case LE: code1 = LT; code2 = GT; break;
12139 case GE: code1 = GT; code2 = LT; break;
12140 case LEU: code1 = LTU; code2 = GTU; break;
12141 case GEU: code1 = GTU; code2 = LTU; break;
12143 case EQ: code1 = UNKNOWN; code2 = NE; break;
12144 case NE: code2 = UNKNOWN; break;
12147 gcc_unreachable ();
12152 * if (hi(a) < hi(b)) goto true;
12153 * if (hi(a) > hi(b)) goto false;
12154 * if (lo(a) < lo(b)) goto true;
12158 ix86_compare_op0 = hi[0];
12159 ix86_compare_op1 = hi[1];
12161 if (code1 != UNKNOWN)
12162 ix86_expand_branch (code1, label);
12163 if (code2 != UNKNOWN)
12164 ix86_expand_branch (code2, label2);
12166 ix86_compare_op0 = lo[0];
12167 ix86_compare_op1 = lo[1];
12168 ix86_expand_branch (code3, label);
12170 if (code2 != UNKNOWN)
12171 emit_label (label2);
12176 gcc_unreachable ();
12180 /* Split branch based on floating point condition. */
12182 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12183 rtx target1, rtx target2, rtx tmp, rtx pushed)
12185 rtx second, bypass;
12186 rtx label = NULL_RTX;
12188 int bypass_probability = -1, second_probability = -1, probability = -1;
12191 if (target2 != pc_rtx)
12194 code = reverse_condition_maybe_unordered (code);
12199 condition = ix86_expand_fp_compare (code, op1, op2,
12200 tmp, &second, &bypass);
12202 /* Remove pushed operand from stack. */
12204 ix86_free_from_memory (GET_MODE (pushed));
12206 if (split_branch_probability >= 0)
12208 /* Distribute the probabilities across the jumps.
12209 Assume the BYPASS and SECOND to be always test
12211 probability = split_branch_probability;
12213 /* Value of 1 is low enough to make no need for probability
12214 to be updated. Later we may run some experiments and see
12215 if unordered values are more frequent in practice. */
12217 bypass_probability = 1;
12219 second_probability = 1;
12221 if (bypass != NULL_RTX)
12223 label = gen_label_rtx ();
12224 i = emit_jump_insn (gen_rtx_SET
12226 gen_rtx_IF_THEN_ELSE (VOIDmode,
12228 gen_rtx_LABEL_REF (VOIDmode,
12231 if (bypass_probability >= 0)
12233 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12234 GEN_INT (bypass_probability),
12237 i = emit_jump_insn (gen_rtx_SET
12239 gen_rtx_IF_THEN_ELSE (VOIDmode,
12240 condition, target1, target2)));
12241 if (probability >= 0)
12243 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12244 GEN_INT (probability),
12246 if (second != NULL_RTX)
12248 i = emit_jump_insn (gen_rtx_SET
12250 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12252 if (second_probability >= 0)
12254 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12255 GEN_INT (second_probability),
12258 if (label != NULL_RTX)
12259 emit_label (label);
12263 ix86_expand_setcc (enum rtx_code code, rtx dest)
12265 rtx ret, tmp, tmpreg, equiv;
12266 rtx second_test, bypass_test;
12268 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12269 return 0; /* FAIL */
12271 gcc_assert (GET_MODE (dest) == QImode);
12273 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12274 PUT_MODE (ret, QImode);
12279 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12280 if (bypass_test || second_test)
12282 rtx test = second_test;
12284 rtx tmp2 = gen_reg_rtx (QImode);
12287 gcc_assert (!second_test);
12288 test = bypass_test;
12290 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12292 PUT_MODE (test, QImode);
12293 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12296 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12298 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12301 /* Attach a REG_EQUAL note describing the comparison result. */
12302 if (ix86_compare_op0 && ix86_compare_op1)
12304 equiv = simplify_gen_relational (code, QImode,
12305 GET_MODE (ix86_compare_op0),
12306 ix86_compare_op0, ix86_compare_op1);
12307 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12310 return 1; /* DONE */
12313 /* Expand comparison setting or clearing carry flag. Return true when
12314 successful and set pop for the operation. */
12316 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12318 enum machine_mode mode =
12319 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12321 /* Do not handle DImode compares that go through special path. */
12322 if (mode == (TARGET_64BIT ? TImode : DImode))
12325 if (SCALAR_FLOAT_MODE_P (mode))
12327 rtx second_test = NULL, bypass_test = NULL;
12328 rtx compare_op, compare_seq;
12330 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12332 /* Shortcut: following common codes never translate
12333 into carry flag compares. */
12334 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12335 || code == ORDERED || code == UNORDERED)
12338 /* These comparisons require zero flag; swap operands so they won't. */
12339 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12340 && !TARGET_IEEE_FP)
12345 code = swap_condition (code);
12348 /* Try to expand the comparison and verify that we end up with
12349 carry flag based comparison. This fails to be true only when
12350 we decide to expand comparison using arithmetic that is not
12351 too common scenario. */
12353 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12354 &second_test, &bypass_test);
12355 compare_seq = get_insns ();
12358 if (second_test || bypass_test)
12361 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12362 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12363 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12365 code = GET_CODE (compare_op);
12367 if (code != LTU && code != GEU)
12370 emit_insn (compare_seq);
12375 if (!INTEGRAL_MODE_P (mode))
12384 /* Convert a==0 into (unsigned)a<1. */
12387 if (op1 != const0_rtx)
12390 code = (code == EQ ? LTU : GEU);
12393 /* Convert a>b into b<a or a>=b-1. */
12396 if (CONST_INT_P (op1))
12398 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12399 /* Bail out on overflow. We still can swap operands but that
12400 would force loading of the constant into register. */
12401 if (op1 == const0_rtx
12402 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12404 code = (code == GTU ? GEU : LTU);
12411 code = (code == GTU ? LTU : GEU);
12415 /* Convert a>=0 into (unsigned)a<0x80000000. */
12418 if (mode == DImode || op1 != const0_rtx)
12420 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12421 code = (code == LT ? GEU : LTU);
12425 if (mode == DImode || op1 != constm1_rtx)
12427 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12428 code = (code == LE ? GEU : LTU);
12434 /* Swapping operands may cause constant to appear as first operand. */
12435 if (!nonimmediate_operand (op0, VOIDmode))
12437 if (!can_create_pseudo_p ())
12439 op0 = force_reg (mode, op0);
12441 ix86_compare_op0 = op0;
12442 ix86_compare_op1 = op1;
12443 *pop = ix86_expand_compare (code, NULL, NULL);
12444 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12449 ix86_expand_int_movcc (rtx operands[])
12451 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12452 rtx compare_seq, compare_op;
12453 rtx second_test, bypass_test;
12454 enum machine_mode mode = GET_MODE (operands[0]);
12455 bool sign_bit_compare_p = false;;
12458 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12459 compare_seq = get_insns ();
12462 compare_code = GET_CODE (compare_op);
12464 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12465 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12466 sign_bit_compare_p = true;
12468 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12469 HImode insns, we'd be swallowed in word prefix ops. */
12471 if ((mode != HImode || TARGET_FAST_PREFIX)
12472 && (mode != (TARGET_64BIT ? TImode : DImode))
12473 && CONST_INT_P (operands[2])
12474 && CONST_INT_P (operands[3]))
12476 rtx out = operands[0];
12477 HOST_WIDE_INT ct = INTVAL (operands[2]);
12478 HOST_WIDE_INT cf = INTVAL (operands[3]);
12479 HOST_WIDE_INT diff;
12482 /* Sign bit compares are better done using shifts than we do by using
12484 if (sign_bit_compare_p
12485 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12486 ix86_compare_op1, &compare_op))
12488 /* Detect overlap between destination and compare sources. */
12491 if (!sign_bit_compare_p)
12493 bool fpcmp = false;
12495 compare_code = GET_CODE (compare_op);
12497 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12498 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12501 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12504 /* To simplify rest of code, restrict to the GEU case. */
12505 if (compare_code == LTU)
12507 HOST_WIDE_INT tmp = ct;
12510 compare_code = reverse_condition (compare_code);
12511 code = reverse_condition (code);
12516 PUT_CODE (compare_op,
12517 reverse_condition_maybe_unordered
12518 (GET_CODE (compare_op)));
12520 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12524 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12525 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12526 tmp = gen_reg_rtx (mode);
12528 if (mode == DImode)
12529 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12531 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12535 if (code == GT || code == GE)
12536 code = reverse_condition (code);
12539 HOST_WIDE_INT tmp = ct;
12544 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12545 ix86_compare_op1, VOIDmode, 0, -1);
12558 tmp = expand_simple_binop (mode, PLUS,
12560 copy_rtx (tmp), 1, OPTAB_DIRECT);
12571 tmp = expand_simple_binop (mode, IOR,
12573 copy_rtx (tmp), 1, OPTAB_DIRECT);
12575 else if (diff == -1 && ct)
12585 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12587 tmp = expand_simple_binop (mode, PLUS,
12588 copy_rtx (tmp), GEN_INT (cf),
12589 copy_rtx (tmp), 1, OPTAB_DIRECT);
12597 * andl cf - ct, dest
12607 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12610 tmp = expand_simple_binop (mode, AND,
12612 gen_int_mode (cf - ct, mode),
12613 copy_rtx (tmp), 1, OPTAB_DIRECT);
12615 tmp = expand_simple_binop (mode, PLUS,
12616 copy_rtx (tmp), GEN_INT (ct),
12617 copy_rtx (tmp), 1, OPTAB_DIRECT);
12620 if (!rtx_equal_p (tmp, out))
12621 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12623 return 1; /* DONE */
12628 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12631 tmp = ct, ct = cf, cf = tmp;
12634 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12636 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12638 /* We may be reversing unordered compare to normal compare, that
12639 is not valid in general (we may convert non-trapping condition
12640 to trapping one), however on i386 we currently emit all
12641 comparisons unordered. */
12642 compare_code = reverse_condition_maybe_unordered (compare_code);
12643 code = reverse_condition_maybe_unordered (code);
12647 compare_code = reverse_condition (compare_code);
12648 code = reverse_condition (code);
12652 compare_code = UNKNOWN;
12653 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12654 && CONST_INT_P (ix86_compare_op1))
12656 if (ix86_compare_op1 == const0_rtx
12657 && (code == LT || code == GE))
12658 compare_code = code;
12659 else if (ix86_compare_op1 == constm1_rtx)
12663 else if (code == GT)
12668 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12669 if (compare_code != UNKNOWN
12670 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12671 && (cf == -1 || ct == -1))
12673 /* If lea code below could be used, only optimize
12674 if it results in a 2 insn sequence. */
12676 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12677 || diff == 3 || diff == 5 || diff == 9)
12678 || (compare_code == LT && ct == -1)
12679 || (compare_code == GE && cf == -1))
12682 * notl op1 (if necessary)
12690 code = reverse_condition (code);
12693 out = emit_store_flag (out, code, ix86_compare_op0,
12694 ix86_compare_op1, VOIDmode, 0, -1);
12696 out = expand_simple_binop (mode, IOR,
12698 out, 1, OPTAB_DIRECT);
12699 if (out != operands[0])
12700 emit_move_insn (operands[0], out);
12702 return 1; /* DONE */
12707 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12708 || diff == 3 || diff == 5 || diff == 9)
12709 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12711 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12717 * lea cf(dest*(ct-cf)),dest
12721 * This also catches the degenerate setcc-only case.
12727 out = emit_store_flag (out, code, ix86_compare_op0,
12728 ix86_compare_op1, VOIDmode, 0, 1);
12731 /* On x86_64 the lea instruction operates on Pmode, so we need
12732 to get arithmetics done in proper mode to match. */
12734 tmp = copy_rtx (out);
12738 out1 = copy_rtx (out);
12739 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12743 tmp = gen_rtx_PLUS (mode, tmp, out1);
12749 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12752 if (!rtx_equal_p (tmp, out))
12755 out = force_operand (tmp, copy_rtx (out));
12757 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12759 if (!rtx_equal_p (out, operands[0]))
12760 emit_move_insn (operands[0], copy_rtx (out));
12762 return 1; /* DONE */
12766 * General case: Jumpful:
12767 * xorl dest,dest cmpl op1, op2
12768 * cmpl op1, op2 movl ct, dest
12769 * setcc dest jcc 1f
12770 * decl dest movl cf, dest
12771 * andl (cf-ct),dest 1:
12774 * Size 20. Size 14.
12776 * This is reasonably steep, but branch mispredict costs are
12777 * high on modern cpus, so consider failing only if optimizing
12781 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12782 && BRANCH_COST >= 2)
12786 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12791 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12793 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12795 /* We may be reversing unordered compare to normal compare,
12796 that is not valid in general (we may convert non-trapping
12797 condition to trapping one), however on i386 we currently
12798 emit all comparisons unordered. */
12799 code = reverse_condition_maybe_unordered (code);
12803 code = reverse_condition (code);
12804 if (compare_code != UNKNOWN)
12805 compare_code = reverse_condition (compare_code);
12809 if (compare_code != UNKNOWN)
12811 /* notl op1 (if needed)
12816 For x < 0 (resp. x <= -1) there will be no notl,
12817 so if possible swap the constants to get rid of the
12819 True/false will be -1/0 while code below (store flag
12820 followed by decrement) is 0/-1, so the constants need
12821 to be exchanged once more. */
12823 if (compare_code == GE || !cf)
12825 code = reverse_condition (code);
12830 HOST_WIDE_INT tmp = cf;
12835 out = emit_store_flag (out, code, ix86_compare_op0,
12836 ix86_compare_op1, VOIDmode, 0, -1);
12840 out = emit_store_flag (out, code, ix86_compare_op0,
12841 ix86_compare_op1, VOIDmode, 0, 1);
12843 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12844 copy_rtx (out), 1, OPTAB_DIRECT);
12847 out = expand_simple_binop (mode, AND, copy_rtx (out),
12848 gen_int_mode (cf - ct, mode),
12849 copy_rtx (out), 1, OPTAB_DIRECT);
12851 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12852 copy_rtx (out), 1, OPTAB_DIRECT);
12853 if (!rtx_equal_p (out, operands[0]))
12854 emit_move_insn (operands[0], copy_rtx (out));
12856 return 1; /* DONE */
12860 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12862 /* Try a few things more with specific constants and a variable. */
12865 rtx var, orig_out, out, tmp;
12867 if (BRANCH_COST <= 2)
12868 return 0; /* FAIL */
12870 /* If one of the two operands is an interesting constant, load a
12871 constant with the above and mask it in with a logical operation. */
12873 if (CONST_INT_P (operands[2]))
12876 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12877 operands[3] = constm1_rtx, op = and_optab;
12878 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12879 operands[3] = const0_rtx, op = ior_optab;
12881 return 0; /* FAIL */
12883 else if (CONST_INT_P (operands[3]))
12886 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12887 operands[2] = constm1_rtx, op = and_optab;
12888 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12889 operands[2] = const0_rtx, op = ior_optab;
12891 return 0; /* FAIL */
12894 return 0; /* FAIL */
12896 orig_out = operands[0];
12897 tmp = gen_reg_rtx (mode);
12900 /* Recurse to get the constant loaded. */
12901 if (ix86_expand_int_movcc (operands) == 0)
12902 return 0; /* FAIL */
12904 /* Mask in the interesting variable. */
12905 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12907 if (!rtx_equal_p (out, orig_out))
12908 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12910 return 1; /* DONE */
12914 * For comparison with above,
12924 if (! nonimmediate_operand (operands[2], mode))
12925 operands[2] = force_reg (mode, operands[2]);
12926 if (! nonimmediate_operand (operands[3], mode))
12927 operands[3] = force_reg (mode, operands[3]);
12929 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12931 rtx tmp = gen_reg_rtx (mode);
12932 emit_move_insn (tmp, operands[3]);
12935 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12937 rtx tmp = gen_reg_rtx (mode);
12938 emit_move_insn (tmp, operands[2]);
12942 if (! register_operand (operands[2], VOIDmode)
12944 || ! register_operand (operands[3], VOIDmode)))
12945 operands[2] = force_reg (mode, operands[2]);
12948 && ! register_operand (operands[3], VOIDmode))
12949 operands[3] = force_reg (mode, operands[3]);
12951 emit_insn (compare_seq);
12952 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12953 gen_rtx_IF_THEN_ELSE (mode,
12954 compare_op, operands[2],
12957 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12958 gen_rtx_IF_THEN_ELSE (mode,
12960 copy_rtx (operands[3]),
12961 copy_rtx (operands[0]))));
12963 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12964 gen_rtx_IF_THEN_ELSE (mode,
12966 copy_rtx (operands[2]),
12967 copy_rtx (operands[0]))));
12969 return 1; /* DONE */
12972 /* Swap, force into registers, or otherwise massage the two operands
12973 to an sse comparison with a mask result. Thus we differ a bit from
12974 ix86_prepare_fp_compare_args which expects to produce a flags result.
12976 The DEST operand exists to help determine whether to commute commutative
12977 operators. The POP0/POP1 operands are updated in place. The new
12978 comparison code is returned, or UNKNOWN if not implementable. */
12980 static enum rtx_code
12981 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12982 rtx *pop0, rtx *pop1)
12990 /* We have no LTGT as an operator. We could implement it with
12991 NE & ORDERED, but this requires an extra temporary. It's
12992 not clear that it's worth it. */
12999 /* These are supported directly. */
13006 /* For commutative operators, try to canonicalize the destination
13007 operand to be first in the comparison - this helps reload to
13008 avoid extra moves. */
13009 if (!dest || !rtx_equal_p (dest, *pop1))
13017 /* These are not supported directly. Swap the comparison operands
13018 to transform into something that is supported. */
13022 code = swap_condition (code);
13026 gcc_unreachable ();
13032 /* Detect conditional moves that exactly match min/max operational
13033 semantics. Note that this is IEEE safe, as long as we don't
13034 interchange the operands.
13036 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13037 and TRUE if the operation is successful and instructions are emitted. */
13040 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13041 rtx cmp_op1, rtx if_true, rtx if_false)
13043 enum machine_mode mode;
13049 else if (code == UNGE)
13052 if_true = if_false;
13058 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13060 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13065 mode = GET_MODE (dest);
13067 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13068 but MODE may be a vector mode and thus not appropriate. */
13069 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13071 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13074 if_true = force_reg (mode, if_true);
13075 v = gen_rtvec (2, if_true, if_false);
13076 tmp = gen_rtx_UNSPEC (mode, v, u);
13080 code = is_min ? SMIN : SMAX;
13081 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13084 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13088 /* Expand an sse vector comparison. Return the register with the result. */
13091 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13092 rtx op_true, rtx op_false)
13094 enum machine_mode mode = GET_MODE (dest);
13097 cmp_op0 = force_reg (mode, cmp_op0);
13098 if (!nonimmediate_operand (cmp_op1, mode))
13099 cmp_op1 = force_reg (mode, cmp_op1);
13102 || reg_overlap_mentioned_p (dest, op_true)
13103 || reg_overlap_mentioned_p (dest, op_false))
13104 dest = gen_reg_rtx (mode);
13106 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13107 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13112 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13113 operations. This is used for both scalar and vector conditional moves. */
13116 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13118 enum machine_mode mode = GET_MODE (dest);
13123 rtx pcmov = gen_rtx_SET (mode, dest,
13124 gen_rtx_IF_THEN_ELSE (mode, cmp,
13129 else if (op_false == CONST0_RTX (mode))
13131 op_true = force_reg (mode, op_true);
13132 x = gen_rtx_AND (mode, cmp, op_true);
13133 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13135 else if (op_true == CONST0_RTX (mode))
13137 op_false = force_reg (mode, op_false);
13138 x = gen_rtx_NOT (mode, cmp);
13139 x = gen_rtx_AND (mode, x, op_false);
13140 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13144 op_true = force_reg (mode, op_true);
13145 op_false = force_reg (mode, op_false);
13147 t2 = gen_reg_rtx (mode);
13149 t3 = gen_reg_rtx (mode);
13153 x = gen_rtx_AND (mode, op_true, cmp);
13154 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13156 x = gen_rtx_NOT (mode, cmp);
13157 x = gen_rtx_AND (mode, x, op_false);
13158 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13160 x = gen_rtx_IOR (mode, t3, t2);
13161 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13165 /* Expand a floating-point conditional move. Return true if successful. */
13168 ix86_expand_fp_movcc (rtx operands[])
13170 enum machine_mode mode = GET_MODE (operands[0]);
13171 enum rtx_code code = GET_CODE (operands[1]);
13172 rtx tmp, compare_op, second_test, bypass_test;
13174 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13176 enum machine_mode cmode;
13178 /* Since we've no cmove for sse registers, don't force bad register
13179 allocation just to gain access to it. Deny movcc when the
13180 comparison mode doesn't match the move mode. */
13181 cmode = GET_MODE (ix86_compare_op0);
13182 if (cmode == VOIDmode)
13183 cmode = GET_MODE (ix86_compare_op1);
13187 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13189 &ix86_compare_op1);
13190 if (code == UNKNOWN)
13193 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13194 ix86_compare_op1, operands[2],
13198 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13199 ix86_compare_op1, operands[2], operands[3]);
13200 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13204 /* The floating point conditional move instructions don't directly
13205 support conditions resulting from a signed integer comparison. */
13207 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13209 /* The floating point conditional move instructions don't directly
13210 support signed integer comparisons. */
13212 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13214 gcc_assert (!second_test && !bypass_test);
13215 tmp = gen_reg_rtx (QImode);
13216 ix86_expand_setcc (code, tmp);
13218 ix86_compare_op0 = tmp;
13219 ix86_compare_op1 = const0_rtx;
13220 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13222 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13224 tmp = gen_reg_rtx (mode);
13225 emit_move_insn (tmp, operands[3]);
13228 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13230 tmp = gen_reg_rtx (mode);
13231 emit_move_insn (tmp, operands[2]);
13235 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13236 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13237 operands[2], operands[3])));
13239 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13240 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13241 operands[3], operands[0])));
13243 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13244 gen_rtx_IF_THEN_ELSE (mode, second_test,
13245 operands[2], operands[0])));
13250 /* Expand a floating-point vector conditional move; a vcond operation
13251 rather than a movcc operation. */
13254 ix86_expand_fp_vcond (rtx operands[])
13256 enum rtx_code code = GET_CODE (operands[3]);
13259 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13260 &operands[4], &operands[5]);
13261 if (code == UNKNOWN)
13264 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13265 operands[5], operands[1], operands[2]))
13268 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13269 operands[1], operands[2]);
13270 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13274 /* Expand a signed/unsigned integral vector conditional move. */
13277 ix86_expand_int_vcond (rtx operands[])
13279 enum machine_mode mode = GET_MODE (operands[0]);
13280 enum rtx_code code = GET_CODE (operands[3]);
13281 bool negate = false;
13284 cop0 = operands[4];
13285 cop1 = operands[5];
13287 /* Canonicalize the comparison to EQ, GT, GTU. */
13298 code = reverse_condition (code);
13304 code = reverse_condition (code);
13310 code = swap_condition (code);
13311 x = cop0, cop0 = cop1, cop1 = x;
13315 gcc_unreachable ();
13318 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13319 if (mode == V2DImode)
13324 /* SSE4.1 supports EQ. */
13325 if (!TARGET_SSE4_1)
13331 /* SSE4.2 supports GT/GTU. */
13332 if (!TARGET_SSE4_2)
13337 gcc_unreachable ();
13341 /* Unsigned parallel compare is not supported by the hardware. Play some
13342 tricks to turn this into a signed comparison against 0. */
13345 cop0 = force_reg (mode, cop0);
13354 /* Perform a parallel modulo subtraction. */
13355 t1 = gen_reg_rtx (mode);
13356 emit_insn ((mode == V4SImode
13358 : gen_subv2di3) (t1, cop0, cop1));
13360 /* Extract the original sign bit of op0. */
13361 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13363 t2 = gen_reg_rtx (mode);
13364 emit_insn ((mode == V4SImode
13366 : gen_andv2di3) (t2, cop0, mask));
13368 /* XOR it back into the result of the subtraction. This results
13369 in the sign bit set iff we saw unsigned underflow. */
13370 x = gen_reg_rtx (mode);
13371 emit_insn ((mode == V4SImode
13373 : gen_xorv2di3) (x, t1, t2));
13381 /* Perform a parallel unsigned saturating subtraction. */
13382 x = gen_reg_rtx (mode);
13383 emit_insn (gen_rtx_SET (VOIDmode, x,
13384 gen_rtx_US_MINUS (mode, cop0, cop1)));
13391 gcc_unreachable ();
13395 cop1 = CONST0_RTX (mode);
13398 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13399 operands[1+negate], operands[2-negate]);
13401 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13402 operands[2-negate]);
13406 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13407 true if we should do zero extension, else sign extension. HIGH_P is
13408 true if we want the N/2 high elements, else the low elements. */
13411 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13413 enum machine_mode imode = GET_MODE (operands[1]);
13414 rtx (*unpack)(rtx, rtx, rtx);
13421 unpack = gen_vec_interleave_highv16qi;
13423 unpack = gen_vec_interleave_lowv16qi;
13427 unpack = gen_vec_interleave_highv8hi;
13429 unpack = gen_vec_interleave_lowv8hi;
13433 unpack = gen_vec_interleave_highv4si;
13435 unpack = gen_vec_interleave_lowv4si;
13438 gcc_unreachable ();
13441 dest = gen_lowpart (imode, operands[0]);
13444 se = force_reg (imode, CONST0_RTX (imode));
13446 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13447 operands[1], pc_rtx, pc_rtx);
13449 emit_insn (unpack (dest, operands[1], se));
13452 /* This function performs the same task as ix86_expand_sse_unpack,
13453 but with SSE4.1 instructions. */
13456 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13458 enum machine_mode imode = GET_MODE (operands[1]);
13459 rtx (*unpack)(rtx, rtx);
13466 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13468 unpack = gen_sse4_1_extendv8qiv8hi2;
13472 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13474 unpack = gen_sse4_1_extendv4hiv4si2;
13478 unpack = gen_sse4_1_zero_extendv2siv2di2;
13480 unpack = gen_sse4_1_extendv2siv2di2;
13483 gcc_unreachable ();
13486 dest = operands[0];
13489 /* Shift higher 8 bytes to lower 8 bytes. */
13490 src = gen_reg_rtx (imode);
13491 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13492 gen_lowpart (TImode, operands[1]),
13498 emit_insn (unpack (dest, src));
13501 /* This function performs the same task as ix86_expand_sse_unpack,
13502 but with amdfam15 instructions. */
13504 #define PPERM_SRC 0x00 /* copy source */
13505 #define PPERM_INVERT 0x20 /* invert source */
13506 #define PPERM_REVERSE 0x40 /* bit reverse source */
13507 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13508 #define PPERM_ZERO 0x80 /* all 0's */
13509 #define PPERM_ONES 0xa0 /* all 1's */
13510 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13511 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13513 #define PPERM_SRC1 0x00 /* use first source byte */
13514 #define PPERM_SRC2 0x10 /* use second source byte */
13517 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13519 enum machine_mode imode = GET_MODE (operands[1]);
13520 int pperm_bytes[16];
13522 int h = (high_p) ? 8 : 0;
13525 rtvec v = rtvec_alloc (16);
13528 rtx op0 = operands[0], op1 = operands[1];
13533 vs = rtvec_alloc (8);
13534 h2 = (high_p) ? 8 : 0;
13535 for (i = 0; i < 8; i++)
13537 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13538 pperm_bytes[2*i+1] = ((unsigned_p)
13540 : PPERM_SIGN | PPERM_SRC2 | i | h);
13543 for (i = 0; i < 16; i++)
13544 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13546 for (i = 0; i < 8; i++)
13547 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13549 p = gen_rtx_PARALLEL (VOIDmode, vs);
13550 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13552 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13554 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13558 vs = rtvec_alloc (4);
13559 h2 = (high_p) ? 4 : 0;
13560 for (i = 0; i < 4; i++)
13562 sign_extend = ((unsigned_p)
13564 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13565 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13566 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13567 pperm_bytes[4*i+2] = sign_extend;
13568 pperm_bytes[4*i+3] = sign_extend;
13571 for (i = 0; i < 16; i++)
13572 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13574 for (i = 0; i < 4; i++)
13575 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13577 p = gen_rtx_PARALLEL (VOIDmode, vs);
13578 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13580 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13582 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13586 vs = rtvec_alloc (2);
13587 h2 = (high_p) ? 2 : 0;
13588 for (i = 0; i < 2; i++)
13590 sign_extend = ((unsigned_p)
13592 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13593 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13594 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13595 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13596 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13597 pperm_bytes[8*i+4] = sign_extend;
13598 pperm_bytes[8*i+5] = sign_extend;
13599 pperm_bytes[8*i+6] = sign_extend;
13600 pperm_bytes[8*i+7] = sign_extend;
13603 for (i = 0; i < 16; i++)
13604 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13606 for (i = 0; i < 2; i++)
13607 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13609 p = gen_rtx_PARALLEL (VOIDmode, vs);
13610 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13612 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13614 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13618 gcc_unreachable ();
13624 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13625 next narrower integer vector type */
13627 ix86_expand_sse5_pack (rtx operands[3])
13629 enum machine_mode imode = GET_MODE (operands[0]);
13630 int pperm_bytes[16];
13632 rtvec v = rtvec_alloc (16);
13634 rtx op0 = operands[0];
13635 rtx op1 = operands[1];
13636 rtx op2 = operands[2];
13641 for (i = 0; i < 8; i++)
13643 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13644 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13647 for (i = 0; i < 16; i++)
13648 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13650 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13651 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13655 for (i = 0; i < 4; i++)
13657 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13658 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13659 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13660 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13663 for (i = 0; i < 16; i++)
13664 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13666 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13667 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13671 for (i = 0; i < 2; i++)
13673 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13674 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13675 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13676 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13677 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13678 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13679 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13680 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13683 for (i = 0; i < 16; i++)
13684 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13686 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13687 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13691 gcc_unreachable ();
13697 /* Expand conditional increment or decrement using adb/sbb instructions.
13698 The default case using setcc followed by the conditional move can be
13699 done by generic code. */
13701 ix86_expand_int_addcc (rtx operands[])
13703 enum rtx_code code = GET_CODE (operands[1]);
13705 rtx val = const0_rtx;
13706 bool fpcmp = false;
13707 enum machine_mode mode = GET_MODE (operands[0]);
13709 if (operands[3] != const1_rtx
13710 && operands[3] != constm1_rtx)
13712 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13713 ix86_compare_op1, &compare_op))
13715 code = GET_CODE (compare_op);
13717 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13718 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13721 code = ix86_fp_compare_code_to_integer (code);
13728 PUT_CODE (compare_op,
13729 reverse_condition_maybe_unordered
13730 (GET_CODE (compare_op)));
13732 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13734 PUT_MODE (compare_op, mode);
13736 /* Construct either adc or sbb insn. */
13737 if ((code == LTU) == (operands[3] == constm1_rtx))
13739 switch (GET_MODE (operands[0]))
13742 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13745 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13748 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13751 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13754 gcc_unreachable ();
13759 switch (GET_MODE (operands[0]))
13762 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13765 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13768 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13771 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13774 gcc_unreachable ();
13777 return 1; /* DONE */
13781 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13782 works for floating pointer parameters and nonoffsetable memories.
13783 For pushes, it returns just stack offsets; the values will be saved
13784 in the right order. Maximally three parts are generated. */
13787 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13792 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13794 size = (GET_MODE_SIZE (mode) + 4) / 8;
13796 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13797 gcc_assert (size >= 2 && size <= 3);
13799 /* Optimize constant pool reference to immediates. This is used by fp
13800 moves, that force all constants to memory to allow combining. */
13801 if (MEM_P (operand) && MEM_READONLY_P (operand))
13803 rtx tmp = maybe_get_pool_constant (operand);
13808 if (MEM_P (operand) && !offsettable_memref_p (operand))
13810 /* The only non-offsetable memories we handle are pushes. */
13811 int ok = push_operand (operand, VOIDmode);
13815 operand = copy_rtx (operand);
13816 PUT_MODE (operand, Pmode);
13817 parts[0] = parts[1] = parts[2] = operand;
13821 if (GET_CODE (operand) == CONST_VECTOR)
13823 enum machine_mode imode = int_mode_for_mode (mode);
13824 /* Caution: if we looked through a constant pool memory above,
13825 the operand may actually have a different mode now. That's
13826 ok, since we want to pun this all the way back to an integer. */
13827 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13828 gcc_assert (operand != NULL);
13834 if (mode == DImode)
13835 split_di (&operand, 1, &parts[0], &parts[1]);
13838 if (REG_P (operand))
13840 gcc_assert (reload_completed);
13841 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13842 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13844 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13846 else if (offsettable_memref_p (operand))
13848 operand = adjust_address (operand, SImode, 0);
13849 parts[0] = operand;
13850 parts[1] = adjust_address (operand, SImode, 4);
13852 parts[2] = adjust_address (operand, SImode, 8);
13854 else if (GET_CODE (operand) == CONST_DOUBLE)
13859 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13863 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13864 parts[2] = gen_int_mode (l[2], SImode);
13867 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13870 gcc_unreachable ();
13872 parts[1] = gen_int_mode (l[1], SImode);
13873 parts[0] = gen_int_mode (l[0], SImode);
13876 gcc_unreachable ();
13881 if (mode == TImode)
13882 split_ti (&operand, 1, &parts[0], &parts[1]);
13883 if (mode == XFmode || mode == TFmode)
13885 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13886 if (REG_P (operand))
13888 gcc_assert (reload_completed);
13889 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13890 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13892 else if (offsettable_memref_p (operand))
13894 operand = adjust_address (operand, DImode, 0);
13895 parts[0] = operand;
13896 parts[1] = adjust_address (operand, upper_mode, 8);
13898 else if (GET_CODE (operand) == CONST_DOUBLE)
13903 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13904 real_to_target (l, &r, mode);
13906 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13907 if (HOST_BITS_PER_WIDE_INT >= 64)
13910 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13911 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13914 parts[0] = immed_double_const (l[0], l[1], DImode);
13916 if (upper_mode == SImode)
13917 parts[1] = gen_int_mode (l[2], SImode);
13918 else if (HOST_BITS_PER_WIDE_INT >= 64)
13921 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13922 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13925 parts[1] = immed_double_const (l[2], l[3], DImode);
13928 gcc_unreachable ();
13935 /* Emit insns to perform a move or push of DI, DF, and XF values.
13936 Return false when normal moves are needed; true when all required
13937 insns have been emitted. Operands 2-4 contain the input values
13938 int the correct order; operands 5-7 contain the output values. */
13941 ix86_split_long_move (rtx operands[])
13946 int collisions = 0;
13947 enum machine_mode mode = GET_MODE (operands[0]);
13949 /* The DFmode expanders may ask us to move double.
13950 For 64bit target this is single move. By hiding the fact
13951 here we simplify i386.md splitters. */
13952 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13954 /* Optimize constant pool reference to immediates. This is used by
13955 fp moves, that force all constants to memory to allow combining. */
13957 if (MEM_P (operands[1])
13958 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13959 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13960 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13961 if (push_operand (operands[0], VOIDmode))
13963 operands[0] = copy_rtx (operands[0]);
13964 PUT_MODE (operands[0], Pmode);
13967 operands[0] = gen_lowpart (DImode, operands[0]);
13968 operands[1] = gen_lowpart (DImode, operands[1]);
13969 emit_move_insn (operands[0], operands[1]);
13973 /* The only non-offsettable memory we handle is push. */
13974 if (push_operand (operands[0], VOIDmode))
13977 gcc_assert (!MEM_P (operands[0])
13978 || offsettable_memref_p (operands[0]));
13980 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13981 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13983 /* When emitting push, take care for source operands on the stack. */
13984 if (push && MEM_P (operands[1])
13985 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13988 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13989 XEXP (part[1][2], 0));
13990 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13991 XEXP (part[1][1], 0));
13994 /* We need to do copy in the right order in case an address register
13995 of the source overlaps the destination. */
13996 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13998 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14000 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14003 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14006 /* Collision in the middle part can be handled by reordering. */
14007 if (collisions == 1 && nparts == 3
14008 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14011 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14012 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14015 /* If there are more collisions, we can't handle it by reordering.
14016 Do an lea to the last part and use only one colliding move. */
14017 else if (collisions > 1)
14023 base = part[0][nparts - 1];
14025 /* Handle the case when the last part isn't valid for lea.
14026 Happens in 64-bit mode storing the 12-byte XFmode. */
14027 if (GET_MODE (base) != Pmode)
14028 base = gen_rtx_REG (Pmode, REGNO (base));
14030 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14031 part[1][0] = replace_equiv_address (part[1][0], base);
14032 part[1][1] = replace_equiv_address (part[1][1],
14033 plus_constant (base, UNITS_PER_WORD));
14035 part[1][2] = replace_equiv_address (part[1][2],
14036 plus_constant (base, 8));
14046 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14047 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14048 emit_move_insn (part[0][2], part[1][2]);
14053 /* In 64bit mode we don't have 32bit push available. In case this is
14054 register, it is OK - we will just use larger counterpart. We also
14055 retype memory - these comes from attempt to avoid REX prefix on
14056 moving of second half of TFmode value. */
14057 if (GET_MODE (part[1][1]) == SImode)
14059 switch (GET_CODE (part[1][1]))
14062 part[1][1] = adjust_address (part[1][1], DImode, 0);
14066 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14070 gcc_unreachable ();
14073 if (GET_MODE (part[1][0]) == SImode)
14074 part[1][0] = part[1][1];
14077 emit_move_insn (part[0][1], part[1][1]);
14078 emit_move_insn (part[0][0], part[1][0]);
14082 /* Choose correct order to not overwrite the source before it is copied. */
14083 if ((REG_P (part[0][0])
14084 && REG_P (part[1][1])
14085 && (REGNO (part[0][0]) == REGNO (part[1][1])
14087 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14089 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14093 operands[2] = part[0][2];
14094 operands[3] = part[0][1];
14095 operands[4] = part[0][0];
14096 operands[5] = part[1][2];
14097 operands[6] = part[1][1];
14098 operands[7] = part[1][0];
14102 operands[2] = part[0][1];
14103 operands[3] = part[0][0];
14104 operands[5] = part[1][1];
14105 operands[6] = part[1][0];
14112 operands[2] = part[0][0];
14113 operands[3] = part[0][1];
14114 operands[4] = part[0][2];
14115 operands[5] = part[1][0];
14116 operands[6] = part[1][1];
14117 operands[7] = part[1][2];
14121 operands[2] = part[0][0];
14122 operands[3] = part[0][1];
14123 operands[5] = part[1][0];
14124 operands[6] = part[1][1];
14128 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14131 if (CONST_INT_P (operands[5])
14132 && operands[5] != const0_rtx
14133 && REG_P (operands[2]))
14135 if (CONST_INT_P (operands[6])
14136 && INTVAL (operands[6]) == INTVAL (operands[5]))
14137 operands[6] = operands[2];
14140 && CONST_INT_P (operands[7])
14141 && INTVAL (operands[7]) == INTVAL (operands[5]))
14142 operands[7] = operands[2];
14146 && CONST_INT_P (operands[6])
14147 && operands[6] != const0_rtx
14148 && REG_P (operands[3])
14149 && CONST_INT_P (operands[7])
14150 && INTVAL (operands[7]) == INTVAL (operands[6]))
14151 operands[7] = operands[3];
14154 emit_move_insn (operands[2], operands[5]);
14155 emit_move_insn (operands[3], operands[6]);
14157 emit_move_insn (operands[4], operands[7]);
14162 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14163 left shift by a constant, either using a single shift or
14164 a sequence of add instructions. */
14167 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14171 emit_insn ((mode == DImode
14173 : gen_adddi3) (operand, operand, operand));
14175 else if (!optimize_size
14176 && count * ix86_cost->add <= ix86_cost->shift_const)
14179 for (i=0; i<count; i++)
14181 emit_insn ((mode == DImode
14183 : gen_adddi3) (operand, operand, operand));
14187 emit_insn ((mode == DImode
14189 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14193 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14195 rtx low[2], high[2];
14197 const int single_width = mode == DImode ? 32 : 64;
14199 if (CONST_INT_P (operands[2]))
14201 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14202 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14204 if (count >= single_width)
14206 emit_move_insn (high[0], low[1]);
14207 emit_move_insn (low[0], const0_rtx);
14209 if (count > single_width)
14210 ix86_expand_ashl_const (high[0], count - single_width, mode);
14214 if (!rtx_equal_p (operands[0], operands[1]))
14215 emit_move_insn (operands[0], operands[1]);
14216 emit_insn ((mode == DImode
14218 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14219 ix86_expand_ashl_const (low[0], count, mode);
14224 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14226 if (operands[1] == const1_rtx)
14228 /* Assuming we've chosen a QImode capable registers, then 1 << N
14229 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14230 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14232 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14234 ix86_expand_clear (low[0]);
14235 ix86_expand_clear (high[0]);
14236 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14238 d = gen_lowpart (QImode, low[0]);
14239 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14240 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14241 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14243 d = gen_lowpart (QImode, high[0]);
14244 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14245 s = gen_rtx_NE (QImode, flags, const0_rtx);
14246 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14249 /* Otherwise, we can get the same results by manually performing
14250 a bit extract operation on bit 5/6, and then performing the two
14251 shifts. The two methods of getting 0/1 into low/high are exactly
14252 the same size. Avoiding the shift in the bit extract case helps
14253 pentium4 a bit; no one else seems to care much either way. */
14258 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14259 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14261 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14262 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14264 emit_insn ((mode == DImode
14266 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14267 emit_insn ((mode == DImode
14269 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14270 emit_move_insn (low[0], high[0]);
14271 emit_insn ((mode == DImode
14273 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14276 emit_insn ((mode == DImode
14278 : gen_ashldi3) (low[0], low[0], operands[2]));
14279 emit_insn ((mode == DImode
14281 : gen_ashldi3) (high[0], high[0], operands[2]));
14285 if (operands[1] == constm1_rtx)
14287 /* For -1 << N, we can avoid the shld instruction, because we
14288 know that we're shifting 0...31/63 ones into a -1. */
14289 emit_move_insn (low[0], constm1_rtx);
14291 emit_move_insn (high[0], low[0]);
14293 emit_move_insn (high[0], constm1_rtx);
14297 if (!rtx_equal_p (operands[0], operands[1]))
14298 emit_move_insn (operands[0], operands[1]);
14300 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14301 emit_insn ((mode == DImode
14303 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14306 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14308 if (TARGET_CMOVE && scratch)
14310 ix86_expand_clear (scratch);
14311 emit_insn ((mode == DImode
14312 ? gen_x86_shift_adj_1
14313 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14316 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14320 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14322 rtx low[2], high[2];
14324 const int single_width = mode == DImode ? 32 : 64;
14326 if (CONST_INT_P (operands[2]))
14328 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14329 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14331 if (count == single_width * 2 - 1)
14333 emit_move_insn (high[0], high[1]);
14334 emit_insn ((mode == DImode
14336 : gen_ashrdi3) (high[0], high[0],
14337 GEN_INT (single_width - 1)));
14338 emit_move_insn (low[0], high[0]);
14341 else if (count >= single_width)
14343 emit_move_insn (low[0], high[1]);
14344 emit_move_insn (high[0], low[0]);
14345 emit_insn ((mode == DImode
14347 : gen_ashrdi3) (high[0], high[0],
14348 GEN_INT (single_width - 1)));
14349 if (count > single_width)
14350 emit_insn ((mode == DImode
14352 : gen_ashrdi3) (low[0], low[0],
14353 GEN_INT (count - single_width)));
14357 if (!rtx_equal_p (operands[0], operands[1]))
14358 emit_move_insn (operands[0], operands[1]);
14359 emit_insn ((mode == DImode
14361 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14362 emit_insn ((mode == DImode
14364 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14369 if (!rtx_equal_p (operands[0], operands[1]))
14370 emit_move_insn (operands[0], operands[1]);
14372 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14374 emit_insn ((mode == DImode
14376 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14377 emit_insn ((mode == DImode
14379 : gen_ashrdi3) (high[0], high[0], operands[2]));
14381 if (TARGET_CMOVE && scratch)
14383 emit_move_insn (scratch, high[0]);
14384 emit_insn ((mode == DImode
14386 : gen_ashrdi3) (scratch, scratch,
14387 GEN_INT (single_width - 1)));
14388 emit_insn ((mode == DImode
14389 ? gen_x86_shift_adj_1
14390 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14394 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14399 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14401 rtx low[2], high[2];
14403 const int single_width = mode == DImode ? 32 : 64;
14405 if (CONST_INT_P (operands[2]))
14407 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14408 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14410 if (count >= single_width)
14412 emit_move_insn (low[0], high[1]);
14413 ix86_expand_clear (high[0]);
14415 if (count > single_width)
14416 emit_insn ((mode == DImode
14418 : gen_lshrdi3) (low[0], low[0],
14419 GEN_INT (count - single_width)));
14423 if (!rtx_equal_p (operands[0], operands[1]))
14424 emit_move_insn (operands[0], operands[1]);
14425 emit_insn ((mode == DImode
14427 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14428 emit_insn ((mode == DImode
14430 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14435 if (!rtx_equal_p (operands[0], operands[1]))
14436 emit_move_insn (operands[0], operands[1]);
14438 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14440 emit_insn ((mode == DImode
14442 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14443 emit_insn ((mode == DImode
14445 : gen_lshrdi3) (high[0], high[0], operands[2]));
14447 /* Heh. By reversing the arguments, we can reuse this pattern. */
14448 if (TARGET_CMOVE && scratch)
14450 ix86_expand_clear (scratch);
14451 emit_insn ((mode == DImode
14452 ? gen_x86_shift_adj_1
14453 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14457 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14461 /* Predict just emitted jump instruction to be taken with probability PROB. */
14463 predict_jump (int prob)
14465 rtx insn = get_last_insn ();
14466 gcc_assert (JUMP_P (insn));
14468 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14473 /* Helper function for the string operations below. Dest VARIABLE whether
14474 it is aligned to VALUE bytes. If true, jump to the label. */
14476 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14478 rtx label = gen_label_rtx ();
14479 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14480 if (GET_MODE (variable) == DImode)
14481 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14483 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14484 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14487 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14489 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14493 /* Adjust COUNTER by the VALUE. */
14495 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14497 if (GET_MODE (countreg) == DImode)
14498 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14500 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14503 /* Zero extend possibly SImode EXP to Pmode register. */
14505 ix86_zero_extend_to_Pmode (rtx exp)
14508 if (GET_MODE (exp) == VOIDmode)
14509 return force_reg (Pmode, exp);
14510 if (GET_MODE (exp) == Pmode)
14511 return copy_to_mode_reg (Pmode, exp);
14512 r = gen_reg_rtx (Pmode);
14513 emit_insn (gen_zero_extendsidi2 (r, exp));
14517 /* Divide COUNTREG by SCALE. */
14519 scale_counter (rtx countreg, int scale)
14522 rtx piece_size_mask;
14526 if (CONST_INT_P (countreg))
14527 return GEN_INT (INTVAL (countreg) / scale);
14528 gcc_assert (REG_P (countreg));
14530 piece_size_mask = GEN_INT (scale - 1);
14531 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14532 GEN_INT (exact_log2 (scale)),
14533 NULL, 1, OPTAB_DIRECT);
14537 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14538 DImode for constant loop counts. */
14540 static enum machine_mode
14541 counter_mode (rtx count_exp)
14543 if (GET_MODE (count_exp) != VOIDmode)
14544 return GET_MODE (count_exp);
14545 if (GET_CODE (count_exp) != CONST_INT)
14547 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14552 /* When SRCPTR is non-NULL, output simple loop to move memory
14553 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14554 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14555 equivalent loop to set memory by VALUE (supposed to be in MODE).
14557 The size is rounded down to whole number of chunk size moved at once.
14558 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14562 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14563 rtx destptr, rtx srcptr, rtx value,
14564 rtx count, enum machine_mode mode, int unroll,
14567 rtx out_label, top_label, iter, tmp;
14568 enum machine_mode iter_mode = counter_mode (count);
14569 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14570 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14576 top_label = gen_label_rtx ();
14577 out_label = gen_label_rtx ();
14578 iter = gen_reg_rtx (iter_mode);
14580 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14581 NULL, 1, OPTAB_DIRECT);
14582 /* Those two should combine. */
14583 if (piece_size == const1_rtx)
14585 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14587 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14589 emit_move_insn (iter, const0_rtx);
14591 emit_label (top_label);
14593 tmp = convert_modes (Pmode, iter_mode, iter, true);
14594 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14595 destmem = change_address (destmem, mode, x_addr);
14599 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14600 srcmem = change_address (srcmem, mode, y_addr);
14602 /* When unrolling for chips that reorder memory reads and writes,
14603 we can save registers by using single temporary.
14604 Also using 4 temporaries is overkill in 32bit mode. */
14605 if (!TARGET_64BIT && 0)
14607 for (i = 0; i < unroll; i++)
14612 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14614 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14616 emit_move_insn (destmem, srcmem);
14622 gcc_assert (unroll <= 4);
14623 for (i = 0; i < unroll; i++)
14625 tmpreg[i] = gen_reg_rtx (mode);
14629 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14631 emit_move_insn (tmpreg[i], srcmem);
14633 for (i = 0; i < unroll; i++)
14638 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14640 emit_move_insn (destmem, tmpreg[i]);
14645 for (i = 0; i < unroll; i++)
14649 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14650 emit_move_insn (destmem, value);
14653 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14654 true, OPTAB_LIB_WIDEN);
14656 emit_move_insn (iter, tmp);
14658 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14660 if (expected_size != -1)
14662 expected_size /= GET_MODE_SIZE (mode) * unroll;
14663 if (expected_size == 0)
14665 else if (expected_size > REG_BR_PROB_BASE)
14666 predict_jump (REG_BR_PROB_BASE - 1);
14668 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14671 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14672 iter = ix86_zero_extend_to_Pmode (iter);
14673 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14674 true, OPTAB_LIB_WIDEN);
14675 if (tmp != destptr)
14676 emit_move_insn (destptr, tmp);
14679 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14680 true, OPTAB_LIB_WIDEN);
14682 emit_move_insn (srcptr, tmp);
14684 emit_label (out_label);
14687 /* Output "rep; mov" instruction.
14688 Arguments have same meaning as for previous function */
14690 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14691 rtx destptr, rtx srcptr,
14693 enum machine_mode mode)
14699 /* If the size is known, it is shorter to use rep movs. */
14700 if (mode == QImode && CONST_INT_P (count)
14701 && !(INTVAL (count) & 3))
14704 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14705 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14706 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14707 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14708 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14709 if (mode != QImode)
14711 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14712 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14713 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14714 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14715 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14716 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14720 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14721 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14723 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14727 /* Output "rep; stos" instruction.
14728 Arguments have same meaning as for previous function */
14730 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14732 enum machine_mode mode)
14737 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14738 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14739 value = force_reg (mode, gen_lowpart (mode, value));
14740 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14741 if (mode != QImode)
14743 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14744 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14745 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14748 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14749 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14753 emit_strmov (rtx destmem, rtx srcmem,
14754 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14756 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14757 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14758 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14761 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14763 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14764 rtx destptr, rtx srcptr, rtx count, int max_size)
14767 if (CONST_INT_P (count))
14769 HOST_WIDE_INT countval = INTVAL (count);
14772 if ((countval & 0x10) && max_size > 16)
14776 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14777 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14780 gcc_unreachable ();
14783 if ((countval & 0x08) && max_size > 8)
14786 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14789 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14790 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14794 if ((countval & 0x04) && max_size > 4)
14796 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14799 if ((countval & 0x02) && max_size > 2)
14801 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14804 if ((countval & 0x01) && max_size > 1)
14806 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14813 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14814 count, 1, OPTAB_DIRECT);
14815 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14816 count, QImode, 1, 4);
14820 /* When there are stringops, we can cheaply increase dest and src pointers.
14821 Otherwise we save code size by maintaining offset (zero is readily
14822 available from preceding rep operation) and using x86 addressing modes.
14824 if (TARGET_SINGLE_STRINGOP)
14828 rtx label = ix86_expand_aligntest (count, 4, true);
14829 src = change_address (srcmem, SImode, srcptr);
14830 dest = change_address (destmem, SImode, destptr);
14831 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14832 emit_label (label);
14833 LABEL_NUSES (label) = 1;
14837 rtx label = ix86_expand_aligntest (count, 2, true);
14838 src = change_address (srcmem, HImode, srcptr);
14839 dest = change_address (destmem, HImode, destptr);
14840 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14841 emit_label (label);
14842 LABEL_NUSES (label) = 1;
14846 rtx label = ix86_expand_aligntest (count, 1, true);
14847 src = change_address (srcmem, QImode, srcptr);
14848 dest = change_address (destmem, QImode, destptr);
14849 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14850 emit_label (label);
14851 LABEL_NUSES (label) = 1;
14856 rtx offset = force_reg (Pmode, const0_rtx);
14861 rtx label = ix86_expand_aligntest (count, 4, true);
14862 src = change_address (srcmem, SImode, srcptr);
14863 dest = change_address (destmem, SImode, destptr);
14864 emit_move_insn (dest, src);
14865 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14866 true, OPTAB_LIB_WIDEN);
14868 emit_move_insn (offset, tmp);
14869 emit_label (label);
14870 LABEL_NUSES (label) = 1;
14874 rtx label = ix86_expand_aligntest (count, 2, true);
14875 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14876 src = change_address (srcmem, HImode, tmp);
14877 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14878 dest = change_address (destmem, HImode, tmp);
14879 emit_move_insn (dest, src);
14880 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14881 true, OPTAB_LIB_WIDEN);
14883 emit_move_insn (offset, tmp);
14884 emit_label (label);
14885 LABEL_NUSES (label) = 1;
14889 rtx label = ix86_expand_aligntest (count, 1, true);
14890 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14891 src = change_address (srcmem, QImode, tmp);
14892 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14893 dest = change_address (destmem, QImode, tmp);
14894 emit_move_insn (dest, src);
14895 emit_label (label);
14896 LABEL_NUSES (label) = 1;
14901 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14903 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14904 rtx count, int max_size)
14907 expand_simple_binop (counter_mode (count), AND, count,
14908 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14909 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14910 gen_lowpart (QImode, value), count, QImode,
14914 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14916 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14920 if (CONST_INT_P (count))
14922 HOST_WIDE_INT countval = INTVAL (count);
14925 if ((countval & 0x10) && max_size > 16)
14929 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14930 emit_insn (gen_strset (destptr, dest, value));
14931 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14932 emit_insn (gen_strset (destptr, dest, value));
14935 gcc_unreachable ();
14938 if ((countval & 0x08) && max_size > 8)
14942 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14943 emit_insn (gen_strset (destptr, dest, value));
14947 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14948 emit_insn (gen_strset (destptr, dest, value));
14949 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14950 emit_insn (gen_strset (destptr, dest, value));
14954 if ((countval & 0x04) && max_size > 4)
14956 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14957 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14960 if ((countval & 0x02) && max_size > 2)
14962 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14963 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14966 if ((countval & 0x01) && max_size > 1)
14968 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14969 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14976 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14981 rtx label = ix86_expand_aligntest (count, 16, true);
14984 dest = change_address (destmem, DImode, destptr);
14985 emit_insn (gen_strset (destptr, dest, value));
14986 emit_insn (gen_strset (destptr, dest, value));
14990 dest = change_address (destmem, SImode, destptr);
14991 emit_insn (gen_strset (destptr, dest, value));
14992 emit_insn (gen_strset (destptr, dest, value));
14993 emit_insn (gen_strset (destptr, dest, value));
14994 emit_insn (gen_strset (destptr, dest, value));
14996 emit_label (label);
14997 LABEL_NUSES (label) = 1;
15001 rtx label = ix86_expand_aligntest (count, 8, true);
15004 dest = change_address (destmem, DImode, destptr);
15005 emit_insn (gen_strset (destptr, dest, value));
15009 dest = change_address (destmem, SImode, destptr);
15010 emit_insn (gen_strset (destptr, dest, value));
15011 emit_insn (gen_strset (destptr, dest, value));
15013 emit_label (label);
15014 LABEL_NUSES (label) = 1;
15018 rtx label = ix86_expand_aligntest (count, 4, true);
15019 dest = change_address (destmem, SImode, destptr);
15020 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15021 emit_label (label);
15022 LABEL_NUSES (label) = 1;
15026 rtx label = ix86_expand_aligntest (count, 2, true);
15027 dest = change_address (destmem, HImode, destptr);
15028 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15029 emit_label (label);
15030 LABEL_NUSES (label) = 1;
15034 rtx label = ix86_expand_aligntest (count, 1, true);
15035 dest = change_address (destmem, QImode, destptr);
15036 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15037 emit_label (label);
15038 LABEL_NUSES (label) = 1;
15042 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15043 DESIRED_ALIGNMENT. */
15045 expand_movmem_prologue (rtx destmem, rtx srcmem,
15046 rtx destptr, rtx srcptr, rtx count,
15047 int align, int desired_alignment)
15049 if (align <= 1 && desired_alignment > 1)
15051 rtx label = ix86_expand_aligntest (destptr, 1, false);
15052 srcmem = change_address (srcmem, QImode, srcptr);
15053 destmem = change_address (destmem, QImode, destptr);
15054 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15055 ix86_adjust_counter (count, 1);
15056 emit_label (label);
15057 LABEL_NUSES (label) = 1;
15059 if (align <= 2 && desired_alignment > 2)
15061 rtx label = ix86_expand_aligntest (destptr, 2, false);
15062 srcmem = change_address (srcmem, HImode, srcptr);
15063 destmem = change_address (destmem, HImode, destptr);
15064 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15065 ix86_adjust_counter (count, 2);
15066 emit_label (label);
15067 LABEL_NUSES (label) = 1;
15069 if (align <= 4 && desired_alignment > 4)
15071 rtx label = ix86_expand_aligntest (destptr, 4, false);
15072 srcmem = change_address (srcmem, SImode, srcptr);
15073 destmem = change_address (destmem, SImode, destptr);
15074 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15075 ix86_adjust_counter (count, 4);
15076 emit_label (label);
15077 LABEL_NUSES (label) = 1;
15079 gcc_assert (desired_alignment <= 8);
15082 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15083 DESIRED_ALIGNMENT. */
15085 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15086 int align, int desired_alignment)
15088 if (align <= 1 && desired_alignment > 1)
15090 rtx label = ix86_expand_aligntest (destptr, 1, false);
15091 destmem = change_address (destmem, QImode, destptr);
15092 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15093 ix86_adjust_counter (count, 1);
15094 emit_label (label);
15095 LABEL_NUSES (label) = 1;
15097 if (align <= 2 && desired_alignment > 2)
15099 rtx label = ix86_expand_aligntest (destptr, 2, false);
15100 destmem = change_address (destmem, HImode, destptr);
15101 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15102 ix86_adjust_counter (count, 2);
15103 emit_label (label);
15104 LABEL_NUSES (label) = 1;
15106 if (align <= 4 && desired_alignment > 4)
15108 rtx label = ix86_expand_aligntest (destptr, 4, false);
15109 destmem = change_address (destmem, SImode, destptr);
15110 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15111 ix86_adjust_counter (count, 4);
15112 emit_label (label);
15113 LABEL_NUSES (label) = 1;
15115 gcc_assert (desired_alignment <= 8);
15118 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15119 static enum stringop_alg
15120 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15121 int *dynamic_check)
15123 const struct stringop_algs * algs;
15124 /* Algorithms using the rep prefix want at least edi and ecx;
15125 additionally, memset wants eax and memcpy wants esi. Don't
15126 consider such algorithms if the user has appropriated those
15127 registers for their own purposes. */
15128 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15130 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15132 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15133 || (alg != rep_prefix_1_byte \
15134 && alg != rep_prefix_4_byte \
15135 && alg != rep_prefix_8_byte))
15137 *dynamic_check = -1;
15139 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15141 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15142 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15143 return stringop_alg;
15144 /* rep; movq or rep; movl is the smallest variant. */
15145 else if (optimize_size)
15147 if (!count || (count & 3))
15148 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15150 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15152 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15154 else if (expected_size != -1 && expected_size < 4)
15155 return loop_1_byte;
15156 else if (expected_size != -1)
15159 enum stringop_alg alg = libcall;
15160 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15162 /* We get here if the algorithms that were not libcall-based
15163 were rep-prefix based and we are unable to use rep prefixes
15164 based on global register usage. Break out of the loop and
15165 use the heuristic below. */
15166 if (algs->size[i].max == 0)
15168 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15170 enum stringop_alg candidate = algs->size[i].alg;
15172 if (candidate != libcall && ALG_USABLE_P (candidate))
15174 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15175 last non-libcall inline algorithm. */
15176 if (TARGET_INLINE_ALL_STRINGOPS)
15178 /* When the current size is best to be copied by a libcall,
15179 but we are still forced to inline, run the heuristic below
15180 that will pick code for medium sized blocks. */
15181 if (alg != libcall)
15185 else if (ALG_USABLE_P (candidate))
15189 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15191 /* When asked to inline the call anyway, try to pick meaningful choice.
15192 We look for maximal size of block that is faster to copy by hand and
15193 take blocks of at most of that size guessing that average size will
15194 be roughly half of the block.
15196 If this turns out to be bad, we might simply specify the preferred
15197 choice in ix86_costs. */
15198 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15199 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15202 enum stringop_alg alg;
15204 bool any_alg_usable_p = true;
15206 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15208 enum stringop_alg candidate = algs->size[i].alg;
15209 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15211 if (candidate != libcall && candidate
15212 && ALG_USABLE_P (candidate))
15213 max = algs->size[i].max;
15215 /* If there aren't any usable algorithms, then recursing on
15216 smaller sizes isn't going to find anything. Just return the
15217 simple byte-at-a-time copy loop. */
15218 if (!any_alg_usable_p)
15220 /* Pick something reasonable. */
15221 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15222 *dynamic_check = 128;
15223 return loop_1_byte;
15227 alg = decide_alg (count, max / 2, memset, dynamic_check);
15228 gcc_assert (*dynamic_check == -1);
15229 gcc_assert (alg != libcall);
15230 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15231 *dynamic_check = max;
15234 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15235 #undef ALG_USABLE_P
15238 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15239 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15241 decide_alignment (int align,
15242 enum stringop_alg alg,
15245 int desired_align = 0;
15249 gcc_unreachable ();
15251 case unrolled_loop:
15252 desired_align = GET_MODE_SIZE (Pmode);
15254 case rep_prefix_8_byte:
15257 case rep_prefix_4_byte:
15258 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15259 copying whole cacheline at once. */
15260 if (TARGET_PENTIUMPRO)
15265 case rep_prefix_1_byte:
15266 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15267 copying whole cacheline at once. */
15268 if (TARGET_PENTIUMPRO)
15282 if (desired_align < align)
15283 desired_align = align;
15284 if (expected_size != -1 && expected_size < 4)
15285 desired_align = align;
15286 return desired_align;
15289 /* Return the smallest power of 2 greater than VAL. */
15291 smallest_pow2_greater_than (int val)
15299 /* Expand string move (memcpy) operation. Use i386 string operations when
15300 profitable. expand_setmem contains similar code. The code depends upon
15301 architecture, block size and alignment, but always has the same
15304 1) Prologue guard: Conditional that jumps up to epilogues for small
15305 blocks that can be handled by epilogue alone. This is faster but
15306 also needed for correctness, since prologue assume the block is larger
15307 than the desired alignment.
15309 Optional dynamic check for size and libcall for large
15310 blocks is emitted here too, with -minline-stringops-dynamically.
15312 2) Prologue: copy first few bytes in order to get destination aligned
15313 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15314 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15315 We emit either a jump tree on power of two sized blocks, or a byte loop.
15317 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15318 with specified algorithm.
15320 4) Epilogue: code copying tail of the block that is too small to be
15321 handled by main body (or up to size guarded by prologue guard). */
15324 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15325 rtx expected_align_exp, rtx expected_size_exp)
15331 rtx jump_around_label = NULL;
15332 HOST_WIDE_INT align = 1;
15333 unsigned HOST_WIDE_INT count = 0;
15334 HOST_WIDE_INT expected_size = -1;
15335 int size_needed = 0, epilogue_size_needed;
15336 int desired_align = 0;
15337 enum stringop_alg alg;
15340 if (CONST_INT_P (align_exp))
15341 align = INTVAL (align_exp);
15342 /* i386 can do misaligned access on reasonably increased cost. */
15343 if (CONST_INT_P (expected_align_exp)
15344 && INTVAL (expected_align_exp) > align)
15345 align = INTVAL (expected_align_exp);
15346 if (CONST_INT_P (count_exp))
15347 count = expected_size = INTVAL (count_exp);
15348 if (CONST_INT_P (expected_size_exp) && count == 0)
15349 expected_size = INTVAL (expected_size_exp);
15351 /* Make sure we don't need to care about overflow later on. */
15352 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15355 /* Step 0: Decide on preferred algorithm, desired alignment and
15356 size of chunks to be copied by main loop. */
15358 alg = decide_alg (count, expected_size, false, &dynamic_check);
15359 desired_align = decide_alignment (align, alg, expected_size);
15361 if (!TARGET_ALIGN_STRINGOPS)
15362 align = desired_align;
15364 if (alg == libcall)
15366 gcc_assert (alg != no_stringop);
15368 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15369 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15370 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15375 gcc_unreachable ();
15377 size_needed = GET_MODE_SIZE (Pmode);
15379 case unrolled_loop:
15380 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15382 case rep_prefix_8_byte:
15385 case rep_prefix_4_byte:
15388 case rep_prefix_1_byte:
15394 epilogue_size_needed = size_needed;
15396 /* Step 1: Prologue guard. */
15398 /* Alignment code needs count to be in register. */
15399 if (CONST_INT_P (count_exp) && desired_align > align)
15400 count_exp = force_reg (counter_mode (count_exp), count_exp);
15401 gcc_assert (desired_align >= 1 && align >= 1);
15403 /* Ensure that alignment prologue won't copy past end of block. */
15404 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15406 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15407 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15408 Make sure it is power of 2. */
15409 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15411 if (CONST_INT_P (count_exp))
15413 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15418 label = gen_label_rtx ();
15419 emit_cmp_and_jump_insns (count_exp,
15420 GEN_INT (epilogue_size_needed),
15421 LTU, 0, counter_mode (count_exp), 1, label);
15422 if (expected_size == -1 || expected_size < epilogue_size_needed)
15423 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15425 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15429 /* Emit code to decide on runtime whether library call or inline should be
15431 if (dynamic_check != -1)
15433 if (CONST_INT_P (count_exp))
15435 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15437 emit_block_move_via_libcall (dst, src, count_exp, false);
15438 count_exp = const0_rtx;
15444 rtx hot_label = gen_label_rtx ();
15445 jump_around_label = gen_label_rtx ();
15446 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15447 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15448 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15449 emit_block_move_via_libcall (dst, src, count_exp, false);
15450 emit_jump (jump_around_label);
15451 emit_label (hot_label);
15455 /* Step 2: Alignment prologue. */
15457 if (desired_align > align)
15459 /* Except for the first move in epilogue, we no longer know
15460 constant offset in aliasing info. It don't seems to worth
15461 the pain to maintain it for the first move, so throw away
15463 src = change_address (src, BLKmode, srcreg);
15464 dst = change_address (dst, BLKmode, destreg);
15465 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15468 if (label && size_needed == 1)
15470 emit_label (label);
15471 LABEL_NUSES (label) = 1;
15475 /* Step 3: Main loop. */
15481 gcc_unreachable ();
15483 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15484 count_exp, QImode, 1, expected_size);
15487 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15488 count_exp, Pmode, 1, expected_size);
15490 case unrolled_loop:
15491 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15492 registers for 4 temporaries anyway. */
15493 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15494 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15497 case rep_prefix_8_byte:
15498 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15501 case rep_prefix_4_byte:
15502 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15505 case rep_prefix_1_byte:
15506 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15510 /* Adjust properly the offset of src and dest memory for aliasing. */
15511 if (CONST_INT_P (count_exp))
15513 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15514 (count / size_needed) * size_needed);
15515 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15516 (count / size_needed) * size_needed);
15520 src = change_address (src, BLKmode, srcreg);
15521 dst = change_address (dst, BLKmode, destreg);
15524 /* Step 4: Epilogue to copy the remaining bytes. */
15528 /* When the main loop is done, COUNT_EXP might hold original count,
15529 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15530 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15531 bytes. Compensate if needed. */
15533 if (size_needed < epilogue_size_needed)
15536 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15537 GEN_INT (size_needed - 1), count_exp, 1,
15539 if (tmp != count_exp)
15540 emit_move_insn (count_exp, tmp);
15542 emit_label (label);
15543 LABEL_NUSES (label) = 1;
15546 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15547 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15548 epilogue_size_needed);
15549 if (jump_around_label)
15550 emit_label (jump_around_label);
15554 /* Helper function for memcpy. For QImode value 0xXY produce
15555 0xXYXYXYXY of wide specified by MODE. This is essentially
15556 a * 0x10101010, but we can do slightly better than
15557 synth_mult by unwinding the sequence by hand on CPUs with
15560 promote_duplicated_reg (enum machine_mode mode, rtx val)
15562 enum machine_mode valmode = GET_MODE (val);
15564 int nops = mode == DImode ? 3 : 2;
15566 gcc_assert (mode == SImode || mode == DImode);
15567 if (val == const0_rtx)
15568 return copy_to_mode_reg (mode, const0_rtx);
15569 if (CONST_INT_P (val))
15571 HOST_WIDE_INT v = INTVAL (val) & 255;
15575 if (mode == DImode)
15576 v |= (v << 16) << 16;
15577 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15580 if (valmode == VOIDmode)
15582 if (valmode != QImode)
15583 val = gen_lowpart (QImode, val);
15584 if (mode == QImode)
15586 if (!TARGET_PARTIAL_REG_STALL)
15588 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15589 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15590 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15591 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15593 rtx reg = convert_modes (mode, QImode, val, true);
15594 tmp = promote_duplicated_reg (mode, const1_rtx);
15595 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15600 rtx reg = convert_modes (mode, QImode, val, true);
15602 if (!TARGET_PARTIAL_REG_STALL)
15603 if (mode == SImode)
15604 emit_insn (gen_movsi_insv_1 (reg, reg));
15606 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15609 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15610 NULL, 1, OPTAB_DIRECT);
15612 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15614 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15615 NULL, 1, OPTAB_DIRECT);
15616 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15617 if (mode == SImode)
15619 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15620 NULL, 1, OPTAB_DIRECT);
15621 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15626 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15627 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15628 alignment from ALIGN to DESIRED_ALIGN. */
15630 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15635 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15636 promoted_val = promote_duplicated_reg (DImode, val);
15637 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15638 promoted_val = promote_duplicated_reg (SImode, val);
15639 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15640 promoted_val = promote_duplicated_reg (HImode, val);
15642 promoted_val = val;
15644 return promoted_val;
15647 /* Expand string clear operation (bzero). Use i386 string operations when
15648 profitable. See expand_movmem comment for explanation of individual
15649 steps performed. */
15651 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15652 rtx expected_align_exp, rtx expected_size_exp)
15657 rtx jump_around_label = NULL;
15658 HOST_WIDE_INT align = 1;
15659 unsigned HOST_WIDE_INT count = 0;
15660 HOST_WIDE_INT expected_size = -1;
15661 int size_needed = 0, epilogue_size_needed;
15662 int desired_align = 0;
15663 enum stringop_alg alg;
15664 rtx promoted_val = NULL;
15665 bool force_loopy_epilogue = false;
15668 if (CONST_INT_P (align_exp))
15669 align = INTVAL (align_exp);
15670 /* i386 can do misaligned access on reasonably increased cost. */
15671 if (CONST_INT_P (expected_align_exp)
15672 && INTVAL (expected_align_exp) > align)
15673 align = INTVAL (expected_align_exp);
15674 if (CONST_INT_P (count_exp))
15675 count = expected_size = INTVAL (count_exp);
15676 if (CONST_INT_P (expected_size_exp) && count == 0)
15677 expected_size = INTVAL (expected_size_exp);
15679 /* Make sure we don't need to care about overflow later on. */
15680 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15683 /* Step 0: Decide on preferred algorithm, desired alignment and
15684 size of chunks to be copied by main loop. */
15686 alg = decide_alg (count, expected_size, true, &dynamic_check);
15687 desired_align = decide_alignment (align, alg, expected_size);
15689 if (!TARGET_ALIGN_STRINGOPS)
15690 align = desired_align;
15692 if (alg == libcall)
15694 gcc_assert (alg != no_stringop);
15696 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15697 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15702 gcc_unreachable ();
15704 size_needed = GET_MODE_SIZE (Pmode);
15706 case unrolled_loop:
15707 size_needed = GET_MODE_SIZE (Pmode) * 4;
15709 case rep_prefix_8_byte:
15712 case rep_prefix_4_byte:
15715 case rep_prefix_1_byte:
15720 epilogue_size_needed = size_needed;
15722 /* Step 1: Prologue guard. */
15724 /* Alignment code needs count to be in register. */
15725 if (CONST_INT_P (count_exp) && desired_align > align)
15727 enum machine_mode mode = SImode;
15728 if (TARGET_64BIT && (count & ~0xffffffff))
15730 count_exp = force_reg (mode, count_exp);
15732 /* Do the cheap promotion to allow better CSE across the
15733 main loop and epilogue (ie one load of the big constant in the
15734 front of all code. */
15735 if (CONST_INT_P (val_exp))
15736 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15737 desired_align, align);
15738 /* Ensure that alignment prologue won't copy past end of block. */
15739 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15741 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15742 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15743 Make sure it is power of 2. */
15744 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15746 /* To improve performance of small blocks, we jump around the VAL
15747 promoting mode. This mean that if the promoted VAL is not constant,
15748 we might not use it in the epilogue and have to use byte
15750 if (epilogue_size_needed > 2 && !promoted_val)
15751 force_loopy_epilogue = true;
15752 label = gen_label_rtx ();
15753 emit_cmp_and_jump_insns (count_exp,
15754 GEN_INT (epilogue_size_needed),
15755 LTU, 0, counter_mode (count_exp), 1, label);
15756 if (GET_CODE (count_exp) == CONST_INT)
15758 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15759 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15761 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15763 if (dynamic_check != -1)
15765 rtx hot_label = gen_label_rtx ();
15766 jump_around_label = gen_label_rtx ();
15767 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15768 LEU, 0, counter_mode (count_exp), 1, hot_label);
15769 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15770 set_storage_via_libcall (dst, count_exp, val_exp, false);
15771 emit_jump (jump_around_label);
15772 emit_label (hot_label);
15775 /* Step 2: Alignment prologue. */
15777 /* Do the expensive promotion once we branched off the small blocks. */
15779 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15780 desired_align, align);
15781 gcc_assert (desired_align >= 1 && align >= 1);
15783 if (desired_align > align)
15785 /* Except for the first move in epilogue, we no longer know
15786 constant offset in aliasing info. It don't seems to worth
15787 the pain to maintain it for the first move, so throw away
15789 dst = change_address (dst, BLKmode, destreg);
15790 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15793 if (label && size_needed == 1)
15795 emit_label (label);
15796 LABEL_NUSES (label) = 1;
15800 /* Step 3: Main loop. */
15806 gcc_unreachable ();
15808 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15809 count_exp, QImode, 1, expected_size);
15812 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15813 count_exp, Pmode, 1, expected_size);
15815 case unrolled_loop:
15816 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15817 count_exp, Pmode, 4, expected_size);
15819 case rep_prefix_8_byte:
15820 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15823 case rep_prefix_4_byte:
15824 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15827 case rep_prefix_1_byte:
15828 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15832 /* Adjust properly the offset of src and dest memory for aliasing. */
15833 if (CONST_INT_P (count_exp))
15834 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15835 (count / size_needed) * size_needed);
15837 dst = change_address (dst, BLKmode, destreg);
15839 /* Step 4: Epilogue to copy the remaining bytes. */
15843 /* When the main loop is done, COUNT_EXP might hold original count,
15844 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15845 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15846 bytes. Compensate if needed. */
15848 if (size_needed < desired_align - align)
15851 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15852 GEN_INT (size_needed - 1), count_exp, 1,
15854 size_needed = desired_align - align + 1;
15855 if (tmp != count_exp)
15856 emit_move_insn (count_exp, tmp);
15858 emit_label (label);
15859 LABEL_NUSES (label) = 1;
15861 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15863 if (force_loopy_epilogue)
15864 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15867 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15870 if (jump_around_label)
15871 emit_label (jump_around_label);
15875 /* Expand the appropriate insns for doing strlen if not just doing
15878 out = result, initialized with the start address
15879 align_rtx = alignment of the address.
15880 scratch = scratch register, initialized with the startaddress when
15881 not aligned, otherwise undefined
15883 This is just the body. It needs the initializations mentioned above and
15884 some address computing at the end. These things are done in i386.md. */
15887 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15891 rtx align_2_label = NULL_RTX;
15892 rtx align_3_label = NULL_RTX;
15893 rtx align_4_label = gen_label_rtx ();
15894 rtx end_0_label = gen_label_rtx ();
15896 rtx tmpreg = gen_reg_rtx (SImode);
15897 rtx scratch = gen_reg_rtx (SImode);
15901 if (CONST_INT_P (align_rtx))
15902 align = INTVAL (align_rtx);
15904 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15906 /* Is there a known alignment and is it less than 4? */
15909 rtx scratch1 = gen_reg_rtx (Pmode);
15910 emit_move_insn (scratch1, out);
15911 /* Is there a known alignment and is it not 2? */
15914 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15915 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15917 /* Leave just the 3 lower bits. */
15918 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15919 NULL_RTX, 0, OPTAB_WIDEN);
15921 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15922 Pmode, 1, align_4_label);
15923 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15924 Pmode, 1, align_2_label);
15925 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15926 Pmode, 1, align_3_label);
15930 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15931 check if is aligned to 4 - byte. */
15933 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15934 NULL_RTX, 0, OPTAB_WIDEN);
15936 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15937 Pmode, 1, align_4_label);
15940 mem = change_address (src, QImode, out);
15942 /* Now compare the bytes. */
15944 /* Compare the first n unaligned byte on a byte per byte basis. */
15945 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15946 QImode, 1, end_0_label);
15948 /* Increment the address. */
15950 emit_insn (gen_adddi3 (out, out, const1_rtx));
15952 emit_insn (gen_addsi3 (out, out, const1_rtx));
15954 /* Not needed with an alignment of 2 */
15957 emit_label (align_2_label);
15959 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15963 emit_insn (gen_adddi3 (out, out, const1_rtx));
15965 emit_insn (gen_addsi3 (out, out, const1_rtx));
15967 emit_label (align_3_label);
15970 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15974 emit_insn (gen_adddi3 (out, out, const1_rtx));
15976 emit_insn (gen_addsi3 (out, out, const1_rtx));
15979 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15980 align this loop. It gives only huge programs, but does not help to
15982 emit_label (align_4_label);
15984 mem = change_address (src, SImode, out);
15985 emit_move_insn (scratch, mem);
15987 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15989 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15991 /* This formula yields a nonzero result iff one of the bytes is zero.
15992 This saves three branches inside loop and many cycles. */
15994 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15995 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15996 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15997 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15998 gen_int_mode (0x80808080, SImode)));
15999 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16004 rtx reg = gen_reg_rtx (SImode);
16005 rtx reg2 = gen_reg_rtx (Pmode);
16006 emit_move_insn (reg, tmpreg);
16007 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16009 /* If zero is not in the first two bytes, move two bytes forward. */
16010 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16011 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16012 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16013 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16014 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16017 /* Emit lea manually to avoid clobbering of flags. */
16018 emit_insn (gen_rtx_SET (SImode, reg2,
16019 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16021 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16022 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16023 emit_insn (gen_rtx_SET (VOIDmode, out,
16024 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16031 rtx end_2_label = gen_label_rtx ();
16032 /* Is zero in the first two bytes? */
16034 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16035 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16036 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16037 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16038 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16040 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16041 JUMP_LABEL (tmp) = end_2_label;
16043 /* Not in the first two. Move two bytes forward. */
16044 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16046 emit_insn (gen_adddi3 (out, out, const2_rtx));
16048 emit_insn (gen_addsi3 (out, out, const2_rtx));
16050 emit_label (end_2_label);
16054 /* Avoid branch in fixing the byte. */
16055 tmpreg = gen_lowpart (QImode, tmpreg);
16056 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16057 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16059 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16061 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16063 emit_label (end_0_label);
16066 /* Expand strlen. */
16069 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16071 rtx addr, scratch1, scratch2, scratch3, scratch4;
16073 /* The generic case of strlen expander is long. Avoid it's
16074 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16076 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16077 && !TARGET_INLINE_ALL_STRINGOPS
16079 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16082 addr = force_reg (Pmode, XEXP (src, 0));
16083 scratch1 = gen_reg_rtx (Pmode);
16085 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16088 /* Well it seems that some optimizer does not combine a call like
16089 foo(strlen(bar), strlen(bar));
16090 when the move and the subtraction is done here. It does calculate
16091 the length just once when these instructions are done inside of
16092 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16093 often used and I use one fewer register for the lifetime of
16094 output_strlen_unroll() this is better. */
16096 emit_move_insn (out, addr);
16098 ix86_expand_strlensi_unroll_1 (out, src, align);
16100 /* strlensi_unroll_1 returns the address of the zero at the end of
16101 the string, like memchr(), so compute the length by subtracting
16102 the start address. */
16104 emit_insn (gen_subdi3 (out, out, addr));
16106 emit_insn (gen_subsi3 (out, out, addr));
16112 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16113 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16116 scratch2 = gen_reg_rtx (Pmode);
16117 scratch3 = gen_reg_rtx (Pmode);
16118 scratch4 = force_reg (Pmode, constm1_rtx);
16120 emit_move_insn (scratch3, addr);
16121 eoschar = force_reg (QImode, eoschar);
16123 src = replace_equiv_address_nv (src, scratch3);
16125 /* If .md starts supporting :P, this can be done in .md. */
16126 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16127 scratch4), UNSPEC_SCAS);
16128 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16131 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16132 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16136 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16137 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16143 /* For given symbol (function) construct code to compute address of it's PLT
16144 entry in large x86-64 PIC model. */
16146 construct_plt_address (rtx symbol)
16148 rtx tmp = gen_reg_rtx (Pmode);
16149 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16151 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16152 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16154 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16155 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16160 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16161 rtx callarg2 ATTRIBUTE_UNUSED,
16162 rtx pop, int sibcall)
16164 rtx use = NULL, call;
16166 if (pop == const0_rtx)
16168 gcc_assert (!TARGET_64BIT || !pop);
16170 if (TARGET_MACHO && !TARGET_64BIT)
16173 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16174 fnaddr = machopic_indirect_call_target (fnaddr);
16179 /* Static functions and indirect calls don't need the pic register. */
16180 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16181 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16182 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16183 use_reg (&use, pic_offset_table_rtx);
16186 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16188 rtx al = gen_rtx_REG (QImode, AX_REG);
16189 emit_move_insn (al, callarg2);
16190 use_reg (&use, al);
16193 if (ix86_cmodel == CM_LARGE_PIC
16194 && GET_CODE (fnaddr) == MEM
16195 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16196 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16197 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16198 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16200 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16201 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16203 if (sibcall && TARGET_64BIT
16204 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16207 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16208 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16209 emit_move_insn (fnaddr, addr);
16210 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16213 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16215 call = gen_rtx_SET (VOIDmode, retval, call);
16218 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16219 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16220 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16223 call = emit_call_insn (call);
16225 CALL_INSN_FUNCTION_USAGE (call) = use;
16229 /* Clear stack slot assignments remembered from previous functions.
16230 This is called from INIT_EXPANDERS once before RTL is emitted for each
16233 static struct machine_function *
16234 ix86_init_machine_status (void)
16236 struct machine_function *f;
16238 f = GGC_CNEW (struct machine_function);
16239 f->use_fast_prologue_epilogue_nregs = -1;
16240 f->tls_descriptor_call_expanded_p = 0;
16245 /* Return a MEM corresponding to a stack slot with mode MODE.
16246 Allocate a new slot if necessary.
16248 The RTL for a function can have several slots available: N is
16249 which slot to use. */
16252 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16254 struct stack_local_entry *s;
16256 gcc_assert (n < MAX_386_STACK_LOCALS);
16258 /* Virtual slot is valid only before vregs are instantiated. */
16259 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16261 for (s = ix86_stack_locals; s; s = s->next)
16262 if (s->mode == mode && s->n == n)
16263 return copy_rtx (s->rtl);
16265 s = (struct stack_local_entry *)
16266 ggc_alloc (sizeof (struct stack_local_entry));
16269 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16271 s->next = ix86_stack_locals;
16272 ix86_stack_locals = s;
16276 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16278 static GTY(()) rtx ix86_tls_symbol;
16280 ix86_tls_get_addr (void)
16283 if (!ix86_tls_symbol)
16285 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16286 (TARGET_ANY_GNU_TLS
16288 ? "___tls_get_addr"
16289 : "__tls_get_addr");
16292 return ix86_tls_symbol;
16295 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16297 static GTY(()) rtx ix86_tls_module_base_symbol;
16299 ix86_tls_module_base (void)
16302 if (!ix86_tls_module_base_symbol)
16304 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16305 "_TLS_MODULE_BASE_");
16306 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16307 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16310 return ix86_tls_module_base_symbol;
16313 /* Calculate the length of the memory address in the instruction
16314 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16317 memory_address_length (rtx addr)
16319 struct ix86_address parts;
16320 rtx base, index, disp;
16324 if (GET_CODE (addr) == PRE_DEC
16325 || GET_CODE (addr) == POST_INC
16326 || GET_CODE (addr) == PRE_MODIFY
16327 || GET_CODE (addr) == POST_MODIFY)
16330 ok = ix86_decompose_address (addr, &parts);
16333 if (parts.base && GET_CODE (parts.base) == SUBREG)
16334 parts.base = SUBREG_REG (parts.base);
16335 if (parts.index && GET_CODE (parts.index) == SUBREG)
16336 parts.index = SUBREG_REG (parts.index);
16339 index = parts.index;
16344 - esp as the base always wants an index,
16345 - ebp as the base always wants a displacement. */
16347 /* Register Indirect. */
16348 if (base && !index && !disp)
16350 /* esp (for its index) and ebp (for its displacement) need
16351 the two-byte modrm form. */
16352 if (addr == stack_pointer_rtx
16353 || addr == arg_pointer_rtx
16354 || addr == frame_pointer_rtx
16355 || addr == hard_frame_pointer_rtx)
16359 /* Direct Addressing. */
16360 else if (disp && !base && !index)
16365 /* Find the length of the displacement constant. */
16368 if (base && satisfies_constraint_K (disp))
16373 /* ebp always wants a displacement. */
16374 else if (base == hard_frame_pointer_rtx)
16377 /* An index requires the two-byte modrm form.... */
16379 /* ...like esp, which always wants an index. */
16380 || base == stack_pointer_rtx
16381 || base == arg_pointer_rtx
16382 || base == frame_pointer_rtx)
16389 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16390 is set, expect that insn have 8bit immediate alternative. */
16392 ix86_attr_length_immediate_default (rtx insn, int shortform)
16396 extract_insn_cached (insn);
16397 for (i = recog_data.n_operands - 1; i >= 0; --i)
16398 if (CONSTANT_P (recog_data.operand[i]))
16401 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16405 switch (get_attr_mode (insn))
16416 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16421 fatal_insn ("unknown insn mode", insn);
16427 /* Compute default value for "length_address" attribute. */
16429 ix86_attr_length_address_default (rtx insn)
16433 if (get_attr_type (insn) == TYPE_LEA)
16435 rtx set = PATTERN (insn);
16437 if (GET_CODE (set) == PARALLEL)
16438 set = XVECEXP (set, 0, 0);
16440 gcc_assert (GET_CODE (set) == SET);
16442 return memory_address_length (SET_SRC (set));
16445 extract_insn_cached (insn);
16446 for (i = recog_data.n_operands - 1; i >= 0; --i)
16447 if (MEM_P (recog_data.operand[i]))
16449 return memory_address_length (XEXP (recog_data.operand[i], 0));
16455 /* Return the maximum number of instructions a cpu can issue. */
16458 ix86_issue_rate (void)
16462 case PROCESSOR_PENTIUM:
16466 case PROCESSOR_PENTIUMPRO:
16467 case PROCESSOR_PENTIUM4:
16468 case PROCESSOR_ATHLON:
16470 case PROCESSOR_AMDFAM10:
16471 case PROCESSOR_NOCONA:
16472 case PROCESSOR_GENERIC32:
16473 case PROCESSOR_GENERIC64:
16476 case PROCESSOR_CORE2:
16484 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16485 by DEP_INSN and nothing set by DEP_INSN. */
16488 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16492 /* Simplify the test for uninteresting insns. */
16493 if (insn_type != TYPE_SETCC
16494 && insn_type != TYPE_ICMOV
16495 && insn_type != TYPE_FCMOV
16496 && insn_type != TYPE_IBR)
16499 if ((set = single_set (dep_insn)) != 0)
16501 set = SET_DEST (set);
16504 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16505 && XVECLEN (PATTERN (dep_insn), 0) == 2
16506 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16507 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16509 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16510 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16515 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16518 /* This test is true if the dependent insn reads the flags but
16519 not any other potentially set register. */
16520 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16523 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16529 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16530 address with operands set by DEP_INSN. */
16533 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16537 if (insn_type == TYPE_LEA
16540 addr = PATTERN (insn);
16542 if (GET_CODE (addr) == PARALLEL)
16543 addr = XVECEXP (addr, 0, 0);
16545 gcc_assert (GET_CODE (addr) == SET);
16547 addr = SET_SRC (addr);
16552 extract_insn_cached (insn);
16553 for (i = recog_data.n_operands - 1; i >= 0; --i)
16554 if (MEM_P (recog_data.operand[i]))
16556 addr = XEXP (recog_data.operand[i], 0);
16563 return modified_in_p (addr, dep_insn);
16567 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16569 enum attr_type insn_type, dep_insn_type;
16570 enum attr_memory memory;
16572 int dep_insn_code_number;
16574 /* Anti and output dependencies have zero cost on all CPUs. */
16575 if (REG_NOTE_KIND (link) != 0)
16578 dep_insn_code_number = recog_memoized (dep_insn);
16580 /* If we can't recognize the insns, we can't really do anything. */
16581 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16584 insn_type = get_attr_type (insn);
16585 dep_insn_type = get_attr_type (dep_insn);
16589 case PROCESSOR_PENTIUM:
16590 /* Address Generation Interlock adds a cycle of latency. */
16591 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16594 /* ??? Compares pair with jump/setcc. */
16595 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16598 /* Floating point stores require value to be ready one cycle earlier. */
16599 if (insn_type == TYPE_FMOV
16600 && get_attr_memory (insn) == MEMORY_STORE
16601 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16605 case PROCESSOR_PENTIUMPRO:
16606 memory = get_attr_memory (insn);
16608 /* INT->FP conversion is expensive. */
16609 if (get_attr_fp_int_src (dep_insn))
16612 /* There is one cycle extra latency between an FP op and a store. */
16613 if (insn_type == TYPE_FMOV
16614 && (set = single_set (dep_insn)) != NULL_RTX
16615 && (set2 = single_set (insn)) != NULL_RTX
16616 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16617 && MEM_P (SET_DEST (set2)))
16620 /* Show ability of reorder buffer to hide latency of load by executing
16621 in parallel with previous instruction in case
16622 previous instruction is not needed to compute the address. */
16623 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16624 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16626 /* Claim moves to take one cycle, as core can issue one load
16627 at time and the next load can start cycle later. */
16628 if (dep_insn_type == TYPE_IMOV
16629 || dep_insn_type == TYPE_FMOV)
16637 memory = get_attr_memory (insn);
16639 /* The esp dependency is resolved before the instruction is really
16641 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16642 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16645 /* INT->FP conversion is expensive. */
16646 if (get_attr_fp_int_src (dep_insn))
16649 /* Show ability of reorder buffer to hide latency of load by executing
16650 in parallel with previous instruction in case
16651 previous instruction is not needed to compute the address. */
16652 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16653 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16655 /* Claim moves to take one cycle, as core can issue one load
16656 at time and the next load can start cycle later. */
16657 if (dep_insn_type == TYPE_IMOV
16658 || dep_insn_type == TYPE_FMOV)
16667 case PROCESSOR_ATHLON:
16669 case PROCESSOR_AMDFAM10:
16670 case PROCESSOR_GENERIC32:
16671 case PROCESSOR_GENERIC64:
16672 memory = get_attr_memory (insn);
16674 /* Show ability of reorder buffer to hide latency of load by executing
16675 in parallel with previous instruction in case
16676 previous instruction is not needed to compute the address. */
16677 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16678 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16680 enum attr_unit unit = get_attr_unit (insn);
16683 /* Because of the difference between the length of integer and
16684 floating unit pipeline preparation stages, the memory operands
16685 for floating point are cheaper.
16687 ??? For Athlon it the difference is most probably 2. */
16688 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16691 loadcost = TARGET_ATHLON ? 2 : 0;
16693 if (cost >= loadcost)
16706 /* How many alternative schedules to try. This should be as wide as the
16707 scheduling freedom in the DFA, but no wider. Making this value too
16708 large results extra work for the scheduler. */
16711 ia32_multipass_dfa_lookahead (void)
16715 case PROCESSOR_PENTIUM:
16718 case PROCESSOR_PENTIUMPRO:
16728 /* Compute the alignment given to a constant that is being placed in memory.
16729 EXP is the constant and ALIGN is the alignment that the object would
16731 The value of this function is used instead of that alignment to align
16735 ix86_constant_alignment (tree exp, int align)
16737 if (TREE_CODE (exp) == REAL_CST)
16739 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16741 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16744 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16745 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16746 return BITS_PER_WORD;
16751 /* Compute the alignment for a static variable.
16752 TYPE is the data type, and ALIGN is the alignment that
16753 the object would ordinarily have. The value of this function is used
16754 instead of that alignment to align the object. */
16757 ix86_data_alignment (tree type, int align)
16759 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16761 if (AGGREGATE_TYPE_P (type)
16762 && TYPE_SIZE (type)
16763 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16764 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16765 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16766 && align < max_align)
16769 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16770 to 16byte boundary. */
16773 if (AGGREGATE_TYPE_P (type)
16774 && TYPE_SIZE (type)
16775 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16776 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16777 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16781 if (TREE_CODE (type) == ARRAY_TYPE)
16783 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16785 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16788 else if (TREE_CODE (type) == COMPLEX_TYPE)
16791 if (TYPE_MODE (type) == DCmode && align < 64)
16793 if (TYPE_MODE (type) == XCmode && align < 128)
16796 else if ((TREE_CODE (type) == RECORD_TYPE
16797 || TREE_CODE (type) == UNION_TYPE
16798 || TREE_CODE (type) == QUAL_UNION_TYPE)
16799 && TYPE_FIELDS (type))
16801 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16803 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16806 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16807 || TREE_CODE (type) == INTEGER_TYPE)
16809 if (TYPE_MODE (type) == DFmode && align < 64)
16811 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16818 /* Compute the alignment for a local variable.
16819 TYPE is the data type, and ALIGN is the alignment that
16820 the object would ordinarily have. The value of this macro is used
16821 instead of that alignment to align the object. */
16824 ix86_local_alignment (tree type, int align)
16826 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16827 to 16byte boundary. */
16830 if (AGGREGATE_TYPE_P (type)
16831 && TYPE_SIZE (type)
16832 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16833 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16834 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16837 if (TREE_CODE (type) == ARRAY_TYPE)
16839 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16841 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16844 else if (TREE_CODE (type) == COMPLEX_TYPE)
16846 if (TYPE_MODE (type) == DCmode && align < 64)
16848 if (TYPE_MODE (type) == XCmode && align < 128)
16851 else if ((TREE_CODE (type) == RECORD_TYPE
16852 || TREE_CODE (type) == UNION_TYPE
16853 || TREE_CODE (type) == QUAL_UNION_TYPE)
16854 && TYPE_FIELDS (type))
16856 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16858 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16861 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16862 || TREE_CODE (type) == INTEGER_TYPE)
16865 if (TYPE_MODE (type) == DFmode && align < 64)
16867 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16873 /* Emit RTL insns to initialize the variable parts of a trampoline.
16874 FNADDR is an RTX for the address of the function's pure code.
16875 CXT is an RTX for the static chain value for the function. */
16877 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16881 /* Compute offset from the end of the jmp to the target function. */
16882 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16883 plus_constant (tramp, 10),
16884 NULL_RTX, 1, OPTAB_DIRECT);
16885 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16886 gen_int_mode (0xb9, QImode));
16887 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16888 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16889 gen_int_mode (0xe9, QImode));
16890 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16895 /* Try to load address using shorter movl instead of movabs.
16896 We may want to support movq for kernel mode, but kernel does not use
16897 trampolines at the moment. */
16898 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16900 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16901 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16902 gen_int_mode (0xbb41, HImode));
16903 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16904 gen_lowpart (SImode, fnaddr));
16909 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16910 gen_int_mode (0xbb49, HImode));
16911 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16915 /* Load static chain using movabs to r10. */
16916 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16917 gen_int_mode (0xba49, HImode));
16918 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16921 /* Jump to the r11 */
16922 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16923 gen_int_mode (0xff49, HImode));
16924 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16925 gen_int_mode (0xe3, QImode));
16927 gcc_assert (offset <= TRAMPOLINE_SIZE);
16930 #ifdef ENABLE_EXECUTE_STACK
16931 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16932 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16936 /* Codes for all the SSE/MMX builtins. */
16939 IX86_BUILTIN_ADDPS,
16940 IX86_BUILTIN_ADDSS,
16941 IX86_BUILTIN_DIVPS,
16942 IX86_BUILTIN_DIVSS,
16943 IX86_BUILTIN_MULPS,
16944 IX86_BUILTIN_MULSS,
16945 IX86_BUILTIN_SUBPS,
16946 IX86_BUILTIN_SUBSS,
16948 IX86_BUILTIN_CMPEQPS,
16949 IX86_BUILTIN_CMPLTPS,
16950 IX86_BUILTIN_CMPLEPS,
16951 IX86_BUILTIN_CMPGTPS,
16952 IX86_BUILTIN_CMPGEPS,
16953 IX86_BUILTIN_CMPNEQPS,
16954 IX86_BUILTIN_CMPNLTPS,
16955 IX86_BUILTIN_CMPNLEPS,
16956 IX86_BUILTIN_CMPNGTPS,
16957 IX86_BUILTIN_CMPNGEPS,
16958 IX86_BUILTIN_CMPORDPS,
16959 IX86_BUILTIN_CMPUNORDPS,
16960 IX86_BUILTIN_CMPEQSS,
16961 IX86_BUILTIN_CMPLTSS,
16962 IX86_BUILTIN_CMPLESS,
16963 IX86_BUILTIN_CMPNEQSS,
16964 IX86_BUILTIN_CMPNLTSS,
16965 IX86_BUILTIN_CMPNLESS,
16966 IX86_BUILTIN_CMPNGTSS,
16967 IX86_BUILTIN_CMPNGESS,
16968 IX86_BUILTIN_CMPORDSS,
16969 IX86_BUILTIN_CMPUNORDSS,
16971 IX86_BUILTIN_COMIEQSS,
16972 IX86_BUILTIN_COMILTSS,
16973 IX86_BUILTIN_COMILESS,
16974 IX86_BUILTIN_COMIGTSS,
16975 IX86_BUILTIN_COMIGESS,
16976 IX86_BUILTIN_COMINEQSS,
16977 IX86_BUILTIN_UCOMIEQSS,
16978 IX86_BUILTIN_UCOMILTSS,
16979 IX86_BUILTIN_UCOMILESS,
16980 IX86_BUILTIN_UCOMIGTSS,
16981 IX86_BUILTIN_UCOMIGESS,
16982 IX86_BUILTIN_UCOMINEQSS,
16984 IX86_BUILTIN_CVTPI2PS,
16985 IX86_BUILTIN_CVTPS2PI,
16986 IX86_BUILTIN_CVTSI2SS,
16987 IX86_BUILTIN_CVTSI642SS,
16988 IX86_BUILTIN_CVTSS2SI,
16989 IX86_BUILTIN_CVTSS2SI64,
16990 IX86_BUILTIN_CVTTPS2PI,
16991 IX86_BUILTIN_CVTTSS2SI,
16992 IX86_BUILTIN_CVTTSS2SI64,
16994 IX86_BUILTIN_MAXPS,
16995 IX86_BUILTIN_MAXSS,
16996 IX86_BUILTIN_MINPS,
16997 IX86_BUILTIN_MINSS,
16999 IX86_BUILTIN_LOADUPS,
17000 IX86_BUILTIN_STOREUPS,
17001 IX86_BUILTIN_MOVSS,
17003 IX86_BUILTIN_MOVHLPS,
17004 IX86_BUILTIN_MOVLHPS,
17005 IX86_BUILTIN_LOADHPS,
17006 IX86_BUILTIN_LOADLPS,
17007 IX86_BUILTIN_STOREHPS,
17008 IX86_BUILTIN_STORELPS,
17010 IX86_BUILTIN_MASKMOVQ,
17011 IX86_BUILTIN_MOVMSKPS,
17012 IX86_BUILTIN_PMOVMSKB,
17014 IX86_BUILTIN_MOVNTPS,
17015 IX86_BUILTIN_MOVNTQ,
17017 IX86_BUILTIN_LOADDQU,
17018 IX86_BUILTIN_STOREDQU,
17020 IX86_BUILTIN_PACKSSWB,
17021 IX86_BUILTIN_PACKSSDW,
17022 IX86_BUILTIN_PACKUSWB,
17024 IX86_BUILTIN_PADDB,
17025 IX86_BUILTIN_PADDW,
17026 IX86_BUILTIN_PADDD,
17027 IX86_BUILTIN_PADDQ,
17028 IX86_BUILTIN_PADDSB,
17029 IX86_BUILTIN_PADDSW,
17030 IX86_BUILTIN_PADDUSB,
17031 IX86_BUILTIN_PADDUSW,
17032 IX86_BUILTIN_PSUBB,
17033 IX86_BUILTIN_PSUBW,
17034 IX86_BUILTIN_PSUBD,
17035 IX86_BUILTIN_PSUBQ,
17036 IX86_BUILTIN_PSUBSB,
17037 IX86_BUILTIN_PSUBSW,
17038 IX86_BUILTIN_PSUBUSB,
17039 IX86_BUILTIN_PSUBUSW,
17042 IX86_BUILTIN_PANDN,
17046 IX86_BUILTIN_PAVGB,
17047 IX86_BUILTIN_PAVGW,
17049 IX86_BUILTIN_PCMPEQB,
17050 IX86_BUILTIN_PCMPEQW,
17051 IX86_BUILTIN_PCMPEQD,
17052 IX86_BUILTIN_PCMPGTB,
17053 IX86_BUILTIN_PCMPGTW,
17054 IX86_BUILTIN_PCMPGTD,
17056 IX86_BUILTIN_PMADDWD,
17058 IX86_BUILTIN_PMAXSW,
17059 IX86_BUILTIN_PMAXUB,
17060 IX86_BUILTIN_PMINSW,
17061 IX86_BUILTIN_PMINUB,
17063 IX86_BUILTIN_PMULHUW,
17064 IX86_BUILTIN_PMULHW,
17065 IX86_BUILTIN_PMULLW,
17067 IX86_BUILTIN_PSADBW,
17068 IX86_BUILTIN_PSHUFW,
17070 IX86_BUILTIN_PSLLW,
17071 IX86_BUILTIN_PSLLD,
17072 IX86_BUILTIN_PSLLQ,
17073 IX86_BUILTIN_PSRAW,
17074 IX86_BUILTIN_PSRAD,
17075 IX86_BUILTIN_PSRLW,
17076 IX86_BUILTIN_PSRLD,
17077 IX86_BUILTIN_PSRLQ,
17078 IX86_BUILTIN_PSLLWI,
17079 IX86_BUILTIN_PSLLDI,
17080 IX86_BUILTIN_PSLLQI,
17081 IX86_BUILTIN_PSRAWI,
17082 IX86_BUILTIN_PSRADI,
17083 IX86_BUILTIN_PSRLWI,
17084 IX86_BUILTIN_PSRLDI,
17085 IX86_BUILTIN_PSRLQI,
17087 IX86_BUILTIN_PUNPCKHBW,
17088 IX86_BUILTIN_PUNPCKHWD,
17089 IX86_BUILTIN_PUNPCKHDQ,
17090 IX86_BUILTIN_PUNPCKLBW,
17091 IX86_BUILTIN_PUNPCKLWD,
17092 IX86_BUILTIN_PUNPCKLDQ,
17094 IX86_BUILTIN_SHUFPS,
17096 IX86_BUILTIN_RCPPS,
17097 IX86_BUILTIN_RCPSS,
17098 IX86_BUILTIN_RSQRTPS,
17099 IX86_BUILTIN_RSQRTPS_NR,
17100 IX86_BUILTIN_RSQRTSS,
17101 IX86_BUILTIN_RSQRTF,
17102 IX86_BUILTIN_SQRTPS,
17103 IX86_BUILTIN_SQRTPS_NR,
17104 IX86_BUILTIN_SQRTSS,
17106 IX86_BUILTIN_UNPCKHPS,
17107 IX86_BUILTIN_UNPCKLPS,
17109 IX86_BUILTIN_ANDPS,
17110 IX86_BUILTIN_ANDNPS,
17112 IX86_BUILTIN_XORPS,
17115 IX86_BUILTIN_LDMXCSR,
17116 IX86_BUILTIN_STMXCSR,
17117 IX86_BUILTIN_SFENCE,
17119 /* 3DNow! Original */
17120 IX86_BUILTIN_FEMMS,
17121 IX86_BUILTIN_PAVGUSB,
17122 IX86_BUILTIN_PF2ID,
17123 IX86_BUILTIN_PFACC,
17124 IX86_BUILTIN_PFADD,
17125 IX86_BUILTIN_PFCMPEQ,
17126 IX86_BUILTIN_PFCMPGE,
17127 IX86_BUILTIN_PFCMPGT,
17128 IX86_BUILTIN_PFMAX,
17129 IX86_BUILTIN_PFMIN,
17130 IX86_BUILTIN_PFMUL,
17131 IX86_BUILTIN_PFRCP,
17132 IX86_BUILTIN_PFRCPIT1,
17133 IX86_BUILTIN_PFRCPIT2,
17134 IX86_BUILTIN_PFRSQIT1,
17135 IX86_BUILTIN_PFRSQRT,
17136 IX86_BUILTIN_PFSUB,
17137 IX86_BUILTIN_PFSUBR,
17138 IX86_BUILTIN_PI2FD,
17139 IX86_BUILTIN_PMULHRW,
17141 /* 3DNow! Athlon Extensions */
17142 IX86_BUILTIN_PF2IW,
17143 IX86_BUILTIN_PFNACC,
17144 IX86_BUILTIN_PFPNACC,
17145 IX86_BUILTIN_PI2FW,
17146 IX86_BUILTIN_PSWAPDSI,
17147 IX86_BUILTIN_PSWAPDSF,
17150 IX86_BUILTIN_ADDPD,
17151 IX86_BUILTIN_ADDSD,
17152 IX86_BUILTIN_DIVPD,
17153 IX86_BUILTIN_DIVSD,
17154 IX86_BUILTIN_MULPD,
17155 IX86_BUILTIN_MULSD,
17156 IX86_BUILTIN_SUBPD,
17157 IX86_BUILTIN_SUBSD,
17159 IX86_BUILTIN_CMPEQPD,
17160 IX86_BUILTIN_CMPLTPD,
17161 IX86_BUILTIN_CMPLEPD,
17162 IX86_BUILTIN_CMPGTPD,
17163 IX86_BUILTIN_CMPGEPD,
17164 IX86_BUILTIN_CMPNEQPD,
17165 IX86_BUILTIN_CMPNLTPD,
17166 IX86_BUILTIN_CMPNLEPD,
17167 IX86_BUILTIN_CMPNGTPD,
17168 IX86_BUILTIN_CMPNGEPD,
17169 IX86_BUILTIN_CMPORDPD,
17170 IX86_BUILTIN_CMPUNORDPD,
17171 IX86_BUILTIN_CMPEQSD,
17172 IX86_BUILTIN_CMPLTSD,
17173 IX86_BUILTIN_CMPLESD,
17174 IX86_BUILTIN_CMPNEQSD,
17175 IX86_BUILTIN_CMPNLTSD,
17176 IX86_BUILTIN_CMPNLESD,
17177 IX86_BUILTIN_CMPORDSD,
17178 IX86_BUILTIN_CMPUNORDSD,
17180 IX86_BUILTIN_COMIEQSD,
17181 IX86_BUILTIN_COMILTSD,
17182 IX86_BUILTIN_COMILESD,
17183 IX86_BUILTIN_COMIGTSD,
17184 IX86_BUILTIN_COMIGESD,
17185 IX86_BUILTIN_COMINEQSD,
17186 IX86_BUILTIN_UCOMIEQSD,
17187 IX86_BUILTIN_UCOMILTSD,
17188 IX86_BUILTIN_UCOMILESD,
17189 IX86_BUILTIN_UCOMIGTSD,
17190 IX86_BUILTIN_UCOMIGESD,
17191 IX86_BUILTIN_UCOMINEQSD,
17193 IX86_BUILTIN_MAXPD,
17194 IX86_BUILTIN_MAXSD,
17195 IX86_BUILTIN_MINPD,
17196 IX86_BUILTIN_MINSD,
17198 IX86_BUILTIN_ANDPD,
17199 IX86_BUILTIN_ANDNPD,
17201 IX86_BUILTIN_XORPD,
17203 IX86_BUILTIN_SQRTPD,
17204 IX86_BUILTIN_SQRTSD,
17206 IX86_BUILTIN_UNPCKHPD,
17207 IX86_BUILTIN_UNPCKLPD,
17209 IX86_BUILTIN_SHUFPD,
17211 IX86_BUILTIN_LOADUPD,
17212 IX86_BUILTIN_STOREUPD,
17213 IX86_BUILTIN_MOVSD,
17215 IX86_BUILTIN_LOADHPD,
17216 IX86_BUILTIN_LOADLPD,
17218 IX86_BUILTIN_CVTDQ2PD,
17219 IX86_BUILTIN_CVTDQ2PS,
17221 IX86_BUILTIN_CVTPD2DQ,
17222 IX86_BUILTIN_CVTPD2PI,
17223 IX86_BUILTIN_CVTPD2PS,
17224 IX86_BUILTIN_CVTTPD2DQ,
17225 IX86_BUILTIN_CVTTPD2PI,
17227 IX86_BUILTIN_CVTPI2PD,
17228 IX86_BUILTIN_CVTSI2SD,
17229 IX86_BUILTIN_CVTSI642SD,
17231 IX86_BUILTIN_CVTSD2SI,
17232 IX86_BUILTIN_CVTSD2SI64,
17233 IX86_BUILTIN_CVTSD2SS,
17234 IX86_BUILTIN_CVTSS2SD,
17235 IX86_BUILTIN_CVTTSD2SI,
17236 IX86_BUILTIN_CVTTSD2SI64,
17238 IX86_BUILTIN_CVTPS2DQ,
17239 IX86_BUILTIN_CVTPS2PD,
17240 IX86_BUILTIN_CVTTPS2DQ,
17242 IX86_BUILTIN_MOVNTI,
17243 IX86_BUILTIN_MOVNTPD,
17244 IX86_BUILTIN_MOVNTDQ,
17247 IX86_BUILTIN_MASKMOVDQU,
17248 IX86_BUILTIN_MOVMSKPD,
17249 IX86_BUILTIN_PMOVMSKB128,
17251 IX86_BUILTIN_PACKSSWB128,
17252 IX86_BUILTIN_PACKSSDW128,
17253 IX86_BUILTIN_PACKUSWB128,
17255 IX86_BUILTIN_PADDB128,
17256 IX86_BUILTIN_PADDW128,
17257 IX86_BUILTIN_PADDD128,
17258 IX86_BUILTIN_PADDQ128,
17259 IX86_BUILTIN_PADDSB128,
17260 IX86_BUILTIN_PADDSW128,
17261 IX86_BUILTIN_PADDUSB128,
17262 IX86_BUILTIN_PADDUSW128,
17263 IX86_BUILTIN_PSUBB128,
17264 IX86_BUILTIN_PSUBW128,
17265 IX86_BUILTIN_PSUBD128,
17266 IX86_BUILTIN_PSUBQ128,
17267 IX86_BUILTIN_PSUBSB128,
17268 IX86_BUILTIN_PSUBSW128,
17269 IX86_BUILTIN_PSUBUSB128,
17270 IX86_BUILTIN_PSUBUSW128,
17272 IX86_BUILTIN_PAND128,
17273 IX86_BUILTIN_PANDN128,
17274 IX86_BUILTIN_POR128,
17275 IX86_BUILTIN_PXOR128,
17277 IX86_BUILTIN_PAVGB128,
17278 IX86_BUILTIN_PAVGW128,
17280 IX86_BUILTIN_PCMPEQB128,
17281 IX86_BUILTIN_PCMPEQW128,
17282 IX86_BUILTIN_PCMPEQD128,
17283 IX86_BUILTIN_PCMPGTB128,
17284 IX86_BUILTIN_PCMPGTW128,
17285 IX86_BUILTIN_PCMPGTD128,
17287 IX86_BUILTIN_PMADDWD128,
17289 IX86_BUILTIN_PMAXSW128,
17290 IX86_BUILTIN_PMAXUB128,
17291 IX86_BUILTIN_PMINSW128,
17292 IX86_BUILTIN_PMINUB128,
17294 IX86_BUILTIN_PMULUDQ,
17295 IX86_BUILTIN_PMULUDQ128,
17296 IX86_BUILTIN_PMULHUW128,
17297 IX86_BUILTIN_PMULHW128,
17298 IX86_BUILTIN_PMULLW128,
17300 IX86_BUILTIN_PSADBW128,
17301 IX86_BUILTIN_PSHUFHW,
17302 IX86_BUILTIN_PSHUFLW,
17303 IX86_BUILTIN_PSHUFD,
17305 IX86_BUILTIN_PSLLDQI128,
17306 IX86_BUILTIN_PSLLWI128,
17307 IX86_BUILTIN_PSLLDI128,
17308 IX86_BUILTIN_PSLLQI128,
17309 IX86_BUILTIN_PSRAWI128,
17310 IX86_BUILTIN_PSRADI128,
17311 IX86_BUILTIN_PSRLDQI128,
17312 IX86_BUILTIN_PSRLWI128,
17313 IX86_BUILTIN_PSRLDI128,
17314 IX86_BUILTIN_PSRLQI128,
17316 IX86_BUILTIN_PSLLDQ128,
17317 IX86_BUILTIN_PSLLW128,
17318 IX86_BUILTIN_PSLLD128,
17319 IX86_BUILTIN_PSLLQ128,
17320 IX86_BUILTIN_PSRAW128,
17321 IX86_BUILTIN_PSRAD128,
17322 IX86_BUILTIN_PSRLW128,
17323 IX86_BUILTIN_PSRLD128,
17324 IX86_BUILTIN_PSRLQ128,
17326 IX86_BUILTIN_PUNPCKHBW128,
17327 IX86_BUILTIN_PUNPCKHWD128,
17328 IX86_BUILTIN_PUNPCKHDQ128,
17329 IX86_BUILTIN_PUNPCKHQDQ128,
17330 IX86_BUILTIN_PUNPCKLBW128,
17331 IX86_BUILTIN_PUNPCKLWD128,
17332 IX86_BUILTIN_PUNPCKLDQ128,
17333 IX86_BUILTIN_PUNPCKLQDQ128,
17335 IX86_BUILTIN_CLFLUSH,
17336 IX86_BUILTIN_MFENCE,
17337 IX86_BUILTIN_LFENCE,
17339 /* Prescott New Instructions. */
17340 IX86_BUILTIN_ADDSUBPS,
17341 IX86_BUILTIN_HADDPS,
17342 IX86_BUILTIN_HSUBPS,
17343 IX86_BUILTIN_MOVSHDUP,
17344 IX86_BUILTIN_MOVSLDUP,
17345 IX86_BUILTIN_ADDSUBPD,
17346 IX86_BUILTIN_HADDPD,
17347 IX86_BUILTIN_HSUBPD,
17348 IX86_BUILTIN_LDDQU,
17350 IX86_BUILTIN_MONITOR,
17351 IX86_BUILTIN_MWAIT,
17354 IX86_BUILTIN_PHADDW,
17355 IX86_BUILTIN_PHADDD,
17356 IX86_BUILTIN_PHADDSW,
17357 IX86_BUILTIN_PHSUBW,
17358 IX86_BUILTIN_PHSUBD,
17359 IX86_BUILTIN_PHSUBSW,
17360 IX86_BUILTIN_PMADDUBSW,
17361 IX86_BUILTIN_PMULHRSW,
17362 IX86_BUILTIN_PSHUFB,
17363 IX86_BUILTIN_PSIGNB,
17364 IX86_BUILTIN_PSIGNW,
17365 IX86_BUILTIN_PSIGND,
17366 IX86_BUILTIN_PALIGNR,
17367 IX86_BUILTIN_PABSB,
17368 IX86_BUILTIN_PABSW,
17369 IX86_BUILTIN_PABSD,
17371 IX86_BUILTIN_PHADDW128,
17372 IX86_BUILTIN_PHADDD128,
17373 IX86_BUILTIN_PHADDSW128,
17374 IX86_BUILTIN_PHSUBW128,
17375 IX86_BUILTIN_PHSUBD128,
17376 IX86_BUILTIN_PHSUBSW128,
17377 IX86_BUILTIN_PMADDUBSW128,
17378 IX86_BUILTIN_PMULHRSW128,
17379 IX86_BUILTIN_PSHUFB128,
17380 IX86_BUILTIN_PSIGNB128,
17381 IX86_BUILTIN_PSIGNW128,
17382 IX86_BUILTIN_PSIGND128,
17383 IX86_BUILTIN_PALIGNR128,
17384 IX86_BUILTIN_PABSB128,
17385 IX86_BUILTIN_PABSW128,
17386 IX86_BUILTIN_PABSD128,
17388 /* AMDFAM10 - SSE4A New Instructions. */
17389 IX86_BUILTIN_MOVNTSD,
17390 IX86_BUILTIN_MOVNTSS,
17391 IX86_BUILTIN_EXTRQI,
17392 IX86_BUILTIN_EXTRQ,
17393 IX86_BUILTIN_INSERTQI,
17394 IX86_BUILTIN_INSERTQ,
17397 IX86_BUILTIN_BLENDPD,
17398 IX86_BUILTIN_BLENDPS,
17399 IX86_BUILTIN_BLENDVPD,
17400 IX86_BUILTIN_BLENDVPS,
17401 IX86_BUILTIN_PBLENDVB128,
17402 IX86_BUILTIN_PBLENDW128,
17407 IX86_BUILTIN_INSERTPS128,
17409 IX86_BUILTIN_MOVNTDQA,
17410 IX86_BUILTIN_MPSADBW128,
17411 IX86_BUILTIN_PACKUSDW128,
17412 IX86_BUILTIN_PCMPEQQ,
17413 IX86_BUILTIN_PHMINPOSUW128,
17415 IX86_BUILTIN_PMAXSB128,
17416 IX86_BUILTIN_PMAXSD128,
17417 IX86_BUILTIN_PMAXUD128,
17418 IX86_BUILTIN_PMAXUW128,
17420 IX86_BUILTIN_PMINSB128,
17421 IX86_BUILTIN_PMINSD128,
17422 IX86_BUILTIN_PMINUD128,
17423 IX86_BUILTIN_PMINUW128,
17425 IX86_BUILTIN_PMOVSXBW128,
17426 IX86_BUILTIN_PMOVSXBD128,
17427 IX86_BUILTIN_PMOVSXBQ128,
17428 IX86_BUILTIN_PMOVSXWD128,
17429 IX86_BUILTIN_PMOVSXWQ128,
17430 IX86_BUILTIN_PMOVSXDQ128,
17432 IX86_BUILTIN_PMOVZXBW128,
17433 IX86_BUILTIN_PMOVZXBD128,
17434 IX86_BUILTIN_PMOVZXBQ128,
17435 IX86_BUILTIN_PMOVZXWD128,
17436 IX86_BUILTIN_PMOVZXWQ128,
17437 IX86_BUILTIN_PMOVZXDQ128,
17439 IX86_BUILTIN_PMULDQ128,
17440 IX86_BUILTIN_PMULLD128,
17442 IX86_BUILTIN_ROUNDPD,
17443 IX86_BUILTIN_ROUNDPS,
17444 IX86_BUILTIN_ROUNDSD,
17445 IX86_BUILTIN_ROUNDSS,
17447 IX86_BUILTIN_PTESTZ,
17448 IX86_BUILTIN_PTESTC,
17449 IX86_BUILTIN_PTESTNZC,
17451 IX86_BUILTIN_VEC_INIT_V2SI,
17452 IX86_BUILTIN_VEC_INIT_V4HI,
17453 IX86_BUILTIN_VEC_INIT_V8QI,
17454 IX86_BUILTIN_VEC_EXT_V2DF,
17455 IX86_BUILTIN_VEC_EXT_V2DI,
17456 IX86_BUILTIN_VEC_EXT_V4SF,
17457 IX86_BUILTIN_VEC_EXT_V4SI,
17458 IX86_BUILTIN_VEC_EXT_V8HI,
17459 IX86_BUILTIN_VEC_EXT_V2SI,
17460 IX86_BUILTIN_VEC_EXT_V4HI,
17461 IX86_BUILTIN_VEC_EXT_V16QI,
17462 IX86_BUILTIN_VEC_SET_V2DI,
17463 IX86_BUILTIN_VEC_SET_V4SF,
17464 IX86_BUILTIN_VEC_SET_V4SI,
17465 IX86_BUILTIN_VEC_SET_V8HI,
17466 IX86_BUILTIN_VEC_SET_V4HI,
17467 IX86_BUILTIN_VEC_SET_V16QI,
17469 IX86_BUILTIN_VEC_PACK_SFIX,
17472 IX86_BUILTIN_CRC32QI,
17473 IX86_BUILTIN_CRC32HI,
17474 IX86_BUILTIN_CRC32SI,
17475 IX86_BUILTIN_CRC32DI,
17477 IX86_BUILTIN_PCMPESTRI128,
17478 IX86_BUILTIN_PCMPESTRM128,
17479 IX86_BUILTIN_PCMPESTRA128,
17480 IX86_BUILTIN_PCMPESTRC128,
17481 IX86_BUILTIN_PCMPESTRO128,
17482 IX86_BUILTIN_PCMPESTRS128,
17483 IX86_BUILTIN_PCMPESTRZ128,
17484 IX86_BUILTIN_PCMPISTRI128,
17485 IX86_BUILTIN_PCMPISTRM128,
17486 IX86_BUILTIN_PCMPISTRA128,
17487 IX86_BUILTIN_PCMPISTRC128,
17488 IX86_BUILTIN_PCMPISTRO128,
17489 IX86_BUILTIN_PCMPISTRS128,
17490 IX86_BUILTIN_PCMPISTRZ128,
17492 IX86_BUILTIN_PCMPGTQ,
17494 /* TFmode support builtins. */
17496 IX86_BUILTIN_FABSQ,
17497 IX86_BUILTIN_COPYSIGNQ,
17499 /* SSE5 instructions */
17500 IX86_BUILTIN_FMADDSS,
17501 IX86_BUILTIN_FMADDSD,
17502 IX86_BUILTIN_FMADDPS,
17503 IX86_BUILTIN_FMADDPD,
17504 IX86_BUILTIN_FMSUBSS,
17505 IX86_BUILTIN_FMSUBSD,
17506 IX86_BUILTIN_FMSUBPS,
17507 IX86_BUILTIN_FMSUBPD,
17508 IX86_BUILTIN_FNMADDSS,
17509 IX86_BUILTIN_FNMADDSD,
17510 IX86_BUILTIN_FNMADDPS,
17511 IX86_BUILTIN_FNMADDPD,
17512 IX86_BUILTIN_FNMSUBSS,
17513 IX86_BUILTIN_FNMSUBSD,
17514 IX86_BUILTIN_FNMSUBPS,
17515 IX86_BUILTIN_FNMSUBPD,
17516 IX86_BUILTIN_PCMOV_V2DI,
17517 IX86_BUILTIN_PCMOV_V4SI,
17518 IX86_BUILTIN_PCMOV_V8HI,
17519 IX86_BUILTIN_PCMOV_V16QI,
17520 IX86_BUILTIN_PCMOV_V4SF,
17521 IX86_BUILTIN_PCMOV_V2DF,
17522 IX86_BUILTIN_PPERM,
17523 IX86_BUILTIN_PERMPS,
17524 IX86_BUILTIN_PERMPD,
17525 IX86_BUILTIN_PMACSSWW,
17526 IX86_BUILTIN_PMACSWW,
17527 IX86_BUILTIN_PMACSSWD,
17528 IX86_BUILTIN_PMACSWD,
17529 IX86_BUILTIN_PMACSSDD,
17530 IX86_BUILTIN_PMACSDD,
17531 IX86_BUILTIN_PMACSSDQL,
17532 IX86_BUILTIN_PMACSSDQH,
17533 IX86_BUILTIN_PMACSDQL,
17534 IX86_BUILTIN_PMACSDQH,
17535 IX86_BUILTIN_PMADCSSWD,
17536 IX86_BUILTIN_PMADCSWD,
17537 IX86_BUILTIN_PHADDBW,
17538 IX86_BUILTIN_PHADDBD,
17539 IX86_BUILTIN_PHADDBQ,
17540 IX86_BUILTIN_PHADDWD,
17541 IX86_BUILTIN_PHADDWQ,
17542 IX86_BUILTIN_PHADDDQ,
17543 IX86_BUILTIN_PHADDUBW,
17544 IX86_BUILTIN_PHADDUBD,
17545 IX86_BUILTIN_PHADDUBQ,
17546 IX86_BUILTIN_PHADDUWD,
17547 IX86_BUILTIN_PHADDUWQ,
17548 IX86_BUILTIN_PHADDUDQ,
17549 IX86_BUILTIN_PHSUBBW,
17550 IX86_BUILTIN_PHSUBWD,
17551 IX86_BUILTIN_PHSUBDQ,
17552 IX86_BUILTIN_PROTB,
17553 IX86_BUILTIN_PROTW,
17554 IX86_BUILTIN_PROTD,
17555 IX86_BUILTIN_PROTQ,
17556 IX86_BUILTIN_PROTB_IMM,
17557 IX86_BUILTIN_PROTW_IMM,
17558 IX86_BUILTIN_PROTD_IMM,
17559 IX86_BUILTIN_PROTQ_IMM,
17560 IX86_BUILTIN_PSHLB,
17561 IX86_BUILTIN_PSHLW,
17562 IX86_BUILTIN_PSHLD,
17563 IX86_BUILTIN_PSHLQ,
17564 IX86_BUILTIN_PSHAB,
17565 IX86_BUILTIN_PSHAW,
17566 IX86_BUILTIN_PSHAD,
17567 IX86_BUILTIN_PSHAQ,
17568 IX86_BUILTIN_FRCZSS,
17569 IX86_BUILTIN_FRCZSD,
17570 IX86_BUILTIN_FRCZPS,
17571 IX86_BUILTIN_FRCZPD,
17572 IX86_BUILTIN_CVTPH2PS,
17573 IX86_BUILTIN_CVTPS2PH,
17575 IX86_BUILTIN_COMEQSS,
17576 IX86_BUILTIN_COMNESS,
17577 IX86_BUILTIN_COMLTSS,
17578 IX86_BUILTIN_COMLESS,
17579 IX86_BUILTIN_COMGTSS,
17580 IX86_BUILTIN_COMGESS,
17581 IX86_BUILTIN_COMUEQSS,
17582 IX86_BUILTIN_COMUNESS,
17583 IX86_BUILTIN_COMULTSS,
17584 IX86_BUILTIN_COMULESS,
17585 IX86_BUILTIN_COMUGTSS,
17586 IX86_BUILTIN_COMUGESS,
17587 IX86_BUILTIN_COMORDSS,
17588 IX86_BUILTIN_COMUNORDSS,
17589 IX86_BUILTIN_COMFALSESS,
17590 IX86_BUILTIN_COMTRUESS,
17592 IX86_BUILTIN_COMEQSD,
17593 IX86_BUILTIN_COMNESD,
17594 IX86_BUILTIN_COMLTSD,
17595 IX86_BUILTIN_COMLESD,
17596 IX86_BUILTIN_COMGTSD,
17597 IX86_BUILTIN_COMGESD,
17598 IX86_BUILTIN_COMUEQSD,
17599 IX86_BUILTIN_COMUNESD,
17600 IX86_BUILTIN_COMULTSD,
17601 IX86_BUILTIN_COMULESD,
17602 IX86_BUILTIN_COMUGTSD,
17603 IX86_BUILTIN_COMUGESD,
17604 IX86_BUILTIN_COMORDSD,
17605 IX86_BUILTIN_COMUNORDSD,
17606 IX86_BUILTIN_COMFALSESD,
17607 IX86_BUILTIN_COMTRUESD,
17609 IX86_BUILTIN_COMEQPS,
17610 IX86_BUILTIN_COMNEPS,
17611 IX86_BUILTIN_COMLTPS,
17612 IX86_BUILTIN_COMLEPS,
17613 IX86_BUILTIN_COMGTPS,
17614 IX86_BUILTIN_COMGEPS,
17615 IX86_BUILTIN_COMUEQPS,
17616 IX86_BUILTIN_COMUNEPS,
17617 IX86_BUILTIN_COMULTPS,
17618 IX86_BUILTIN_COMULEPS,
17619 IX86_BUILTIN_COMUGTPS,
17620 IX86_BUILTIN_COMUGEPS,
17621 IX86_BUILTIN_COMORDPS,
17622 IX86_BUILTIN_COMUNORDPS,
17623 IX86_BUILTIN_COMFALSEPS,
17624 IX86_BUILTIN_COMTRUEPS,
17626 IX86_BUILTIN_COMEQPD,
17627 IX86_BUILTIN_COMNEPD,
17628 IX86_BUILTIN_COMLTPD,
17629 IX86_BUILTIN_COMLEPD,
17630 IX86_BUILTIN_COMGTPD,
17631 IX86_BUILTIN_COMGEPD,
17632 IX86_BUILTIN_COMUEQPD,
17633 IX86_BUILTIN_COMUNEPD,
17634 IX86_BUILTIN_COMULTPD,
17635 IX86_BUILTIN_COMULEPD,
17636 IX86_BUILTIN_COMUGTPD,
17637 IX86_BUILTIN_COMUGEPD,
17638 IX86_BUILTIN_COMORDPD,
17639 IX86_BUILTIN_COMUNORDPD,
17640 IX86_BUILTIN_COMFALSEPD,
17641 IX86_BUILTIN_COMTRUEPD,
17643 IX86_BUILTIN_PCOMEQUB,
17644 IX86_BUILTIN_PCOMNEUB,
17645 IX86_BUILTIN_PCOMLTUB,
17646 IX86_BUILTIN_PCOMLEUB,
17647 IX86_BUILTIN_PCOMGTUB,
17648 IX86_BUILTIN_PCOMGEUB,
17649 IX86_BUILTIN_PCOMFALSEUB,
17650 IX86_BUILTIN_PCOMTRUEUB,
17651 IX86_BUILTIN_PCOMEQUW,
17652 IX86_BUILTIN_PCOMNEUW,
17653 IX86_BUILTIN_PCOMLTUW,
17654 IX86_BUILTIN_PCOMLEUW,
17655 IX86_BUILTIN_PCOMGTUW,
17656 IX86_BUILTIN_PCOMGEUW,
17657 IX86_BUILTIN_PCOMFALSEUW,
17658 IX86_BUILTIN_PCOMTRUEUW,
17659 IX86_BUILTIN_PCOMEQUD,
17660 IX86_BUILTIN_PCOMNEUD,
17661 IX86_BUILTIN_PCOMLTUD,
17662 IX86_BUILTIN_PCOMLEUD,
17663 IX86_BUILTIN_PCOMGTUD,
17664 IX86_BUILTIN_PCOMGEUD,
17665 IX86_BUILTIN_PCOMFALSEUD,
17666 IX86_BUILTIN_PCOMTRUEUD,
17667 IX86_BUILTIN_PCOMEQUQ,
17668 IX86_BUILTIN_PCOMNEUQ,
17669 IX86_BUILTIN_PCOMLTUQ,
17670 IX86_BUILTIN_PCOMLEUQ,
17671 IX86_BUILTIN_PCOMGTUQ,
17672 IX86_BUILTIN_PCOMGEUQ,
17673 IX86_BUILTIN_PCOMFALSEUQ,
17674 IX86_BUILTIN_PCOMTRUEUQ,
17676 IX86_BUILTIN_PCOMEQB,
17677 IX86_BUILTIN_PCOMNEB,
17678 IX86_BUILTIN_PCOMLTB,
17679 IX86_BUILTIN_PCOMLEB,
17680 IX86_BUILTIN_PCOMGTB,
17681 IX86_BUILTIN_PCOMGEB,
17682 IX86_BUILTIN_PCOMFALSEB,
17683 IX86_BUILTIN_PCOMTRUEB,
17684 IX86_BUILTIN_PCOMEQW,
17685 IX86_BUILTIN_PCOMNEW,
17686 IX86_BUILTIN_PCOMLTW,
17687 IX86_BUILTIN_PCOMLEW,
17688 IX86_BUILTIN_PCOMGTW,
17689 IX86_BUILTIN_PCOMGEW,
17690 IX86_BUILTIN_PCOMFALSEW,
17691 IX86_BUILTIN_PCOMTRUEW,
17692 IX86_BUILTIN_PCOMEQD,
17693 IX86_BUILTIN_PCOMNED,
17694 IX86_BUILTIN_PCOMLTD,
17695 IX86_BUILTIN_PCOMLED,
17696 IX86_BUILTIN_PCOMGTD,
17697 IX86_BUILTIN_PCOMGED,
17698 IX86_BUILTIN_PCOMFALSED,
17699 IX86_BUILTIN_PCOMTRUED,
17700 IX86_BUILTIN_PCOMEQQ,
17701 IX86_BUILTIN_PCOMNEQ,
17702 IX86_BUILTIN_PCOMLTQ,
17703 IX86_BUILTIN_PCOMLEQ,
17704 IX86_BUILTIN_PCOMGTQ,
17705 IX86_BUILTIN_PCOMGEQ,
17706 IX86_BUILTIN_PCOMFALSEQ,
17707 IX86_BUILTIN_PCOMTRUEQ,
17712 /* Table for the ix86 builtin decls. */
17713 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17715 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17716 * if the target_flags include one of MASK. Stores the function decl
17717 * in the ix86_builtins array.
17718 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17721 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17723 tree decl = NULL_TREE;
17725 if (mask & ix86_isa_flags
17726 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17728 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17730 ix86_builtins[(int) code] = decl;
17736 /* Like def_builtin, but also marks the function decl "const". */
17739 def_builtin_const (int mask, const char *name, tree type,
17740 enum ix86_builtins code)
17742 tree decl = def_builtin (mask, name, type, code);
17744 TREE_READONLY (decl) = 1;
17748 /* Bits for builtin_description.flag. */
17750 /* Set when we don't support the comparison natively, and should
17751 swap_comparison in order to support it. */
17752 #define BUILTIN_DESC_SWAP_OPERANDS 1
17754 struct builtin_description
17756 const unsigned int mask;
17757 const enum insn_code icode;
17758 const char *const name;
17759 const enum ix86_builtins code;
17760 const enum rtx_code comparison;
17764 static const struct builtin_description bdesc_comi[] =
17766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17775 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17776 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17777 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17792 static const struct builtin_description bdesc_ptest[] =
17795 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17796 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17797 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17800 static const struct builtin_description bdesc_pcmpestr[] =
17803 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17804 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17805 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17806 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17807 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17808 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17809 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17812 static const struct builtin_description bdesc_pcmpistr[] =
17815 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17816 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17817 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17818 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17819 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17820 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17821 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17824 static const struct builtin_description bdesc_crc32[] =
17827 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17828 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17829 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17830 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17833 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17834 static const struct builtin_description bdesc_sse_3arg[] =
17837 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17838 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17839 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17840 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17841 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17842 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17843 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17844 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17845 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17846 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17847 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17848 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17851 static const struct builtin_description bdesc_2arg[] =
17854 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17855 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17856 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17859 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17860 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17861 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17864 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17865 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17866 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17886 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17887 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17891 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17893 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17894 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17903 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17904 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17905 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17906 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17907 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17908 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17909 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17910 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17912 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17913 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17914 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17915 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17917 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17918 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17919 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17921 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17922 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17923 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17925 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17926 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17927 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17930 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17931 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17933 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17934 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17940 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17941 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17942 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17943 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17949 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17959 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17961 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17969 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17977 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17980 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17984 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17985 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17986 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17987 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
18000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
18001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
18002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
18005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
18006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
18007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
18008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
18009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
18010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
18011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
18012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
18014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
18015 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
18016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
18017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
18019 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
18020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
18021 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
18022 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
18024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
18025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
18026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
18028 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
18031 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
18032 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
18033 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
18034 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
18035 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
18036 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
18037 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
18038 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
18040 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
18041 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
18042 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
18043 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
18044 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
18045 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
18046 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
18047 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
18049 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
18050 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
18052 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
18053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
18054 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
18055 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
18057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
18058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
18060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
18061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
18062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
18063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
18064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
18065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
18067 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
18068 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
18069 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
18070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
18072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
18073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
18074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
18075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
18076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
18077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
18078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
18079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
18081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
18082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
18083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
18085 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
18086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
18088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
18089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
18091 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
18092 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
18093 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
18095 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
18096 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
18097 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
18099 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
18100 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
18102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
18104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
18105 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
18106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
18107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
18110 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
18111 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
18112 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
18113 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
18114 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
18115 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
18118 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
18119 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
18120 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
18121 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
18122 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
18123 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
18124 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
18125 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
18126 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
18127 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
18128 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
18129 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18130 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18131 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18132 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18133 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18134 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18135 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18136 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18137 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18138 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18139 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18140 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18141 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18144 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18145 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18146 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18147 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18148 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18149 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18150 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18151 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18152 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18153 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18154 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18155 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18158 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18161 static const struct builtin_description bdesc_1arg[] =
18163 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18164 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18166 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18167 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS_NR, UNKNOWN, 0 },
18168 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18169 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, 0 },
18170 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18172 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18173 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18174 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18175 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18176 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18177 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18197 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18198 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18205 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18206 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18209 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18210 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18211 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18212 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18213 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18214 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18217 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18218 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18219 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18220 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18221 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18222 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18223 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18224 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18225 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18226 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18227 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18228 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18229 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18231 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18232 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
18233 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
18237 enum multi_arg_type {
18247 MULTI_ARG_3_PERMPS,
18248 MULTI_ARG_3_PERMPD,
18255 MULTI_ARG_2_DI_IMM,
18256 MULTI_ARG_2_SI_IMM,
18257 MULTI_ARG_2_HI_IMM,
18258 MULTI_ARG_2_QI_IMM,
18259 MULTI_ARG_2_SF_CMP,
18260 MULTI_ARG_2_DF_CMP,
18261 MULTI_ARG_2_DI_CMP,
18262 MULTI_ARG_2_SI_CMP,
18263 MULTI_ARG_2_HI_CMP,
18264 MULTI_ARG_2_QI_CMP,
18287 static const struct builtin_description bdesc_multi_arg[] =
18289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18331 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18332 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18333 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18334 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18336 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18353 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18361 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18385 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18401 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18453 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18477 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18497 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18498 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18499 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18502 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18517 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18525 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18526 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18529 ix86_init_mmx_sse_builtins (void)
18531 const struct builtin_description * d;
18534 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18535 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18536 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18537 tree V2DI_type_node
18538 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18539 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18540 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18541 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18542 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18543 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18544 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18546 tree pchar_type_node = build_pointer_type (char_type_node);
18547 tree pcchar_type_node = build_pointer_type (
18548 build_type_variant (char_type_node, 1, 0));
18549 tree pfloat_type_node = build_pointer_type (float_type_node);
18550 tree pcfloat_type_node = build_pointer_type (
18551 build_type_variant (float_type_node, 1, 0));
18552 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18553 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18554 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18557 tree int_ftype_v4sf_v4sf
18558 = build_function_type_list (integer_type_node,
18559 V4SF_type_node, V4SF_type_node, NULL_TREE);
18560 tree v4si_ftype_v4sf_v4sf
18561 = build_function_type_list (V4SI_type_node,
18562 V4SF_type_node, V4SF_type_node, NULL_TREE);
18563 /* MMX/SSE/integer conversions. */
18564 tree int_ftype_v4sf
18565 = build_function_type_list (integer_type_node,
18566 V4SF_type_node, NULL_TREE);
18567 tree int64_ftype_v4sf
18568 = build_function_type_list (long_long_integer_type_node,
18569 V4SF_type_node, NULL_TREE);
18570 tree int_ftype_v8qi
18571 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18572 tree v4sf_ftype_v4sf_int
18573 = build_function_type_list (V4SF_type_node,
18574 V4SF_type_node, integer_type_node, NULL_TREE);
18575 tree v4sf_ftype_v4sf_int64
18576 = build_function_type_list (V4SF_type_node,
18577 V4SF_type_node, long_long_integer_type_node,
18579 tree v4sf_ftype_v4sf_v2si
18580 = build_function_type_list (V4SF_type_node,
18581 V4SF_type_node, V2SI_type_node, NULL_TREE);
18583 /* Miscellaneous. */
18584 tree v8qi_ftype_v4hi_v4hi
18585 = build_function_type_list (V8QI_type_node,
18586 V4HI_type_node, V4HI_type_node, NULL_TREE);
18587 tree v4hi_ftype_v2si_v2si
18588 = build_function_type_list (V4HI_type_node,
18589 V2SI_type_node, V2SI_type_node, NULL_TREE);
18590 tree v4sf_ftype_v4sf_v4sf_int
18591 = build_function_type_list (V4SF_type_node,
18592 V4SF_type_node, V4SF_type_node,
18593 integer_type_node, NULL_TREE);
18594 tree v2si_ftype_v4hi_v4hi
18595 = build_function_type_list (V2SI_type_node,
18596 V4HI_type_node, V4HI_type_node, NULL_TREE);
18597 tree v4hi_ftype_v4hi_int
18598 = build_function_type_list (V4HI_type_node,
18599 V4HI_type_node, integer_type_node, NULL_TREE);
18600 tree v4hi_ftype_v4hi_di
18601 = build_function_type_list (V4HI_type_node,
18602 V4HI_type_node, long_long_unsigned_type_node,
18604 tree v2si_ftype_v2si_di
18605 = build_function_type_list (V2SI_type_node,
18606 V2SI_type_node, long_long_unsigned_type_node,
18608 tree void_ftype_void
18609 = build_function_type (void_type_node, void_list_node);
18610 tree void_ftype_unsigned
18611 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18612 tree void_ftype_unsigned_unsigned
18613 = build_function_type_list (void_type_node, unsigned_type_node,
18614 unsigned_type_node, NULL_TREE);
18615 tree void_ftype_pcvoid_unsigned_unsigned
18616 = build_function_type_list (void_type_node, const_ptr_type_node,
18617 unsigned_type_node, unsigned_type_node,
18619 tree unsigned_ftype_void
18620 = build_function_type (unsigned_type_node, void_list_node);
18621 tree v2si_ftype_v4sf
18622 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18623 /* Loads/stores. */
18624 tree void_ftype_v8qi_v8qi_pchar
18625 = build_function_type_list (void_type_node,
18626 V8QI_type_node, V8QI_type_node,
18627 pchar_type_node, NULL_TREE);
18628 tree v4sf_ftype_pcfloat
18629 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18630 /* @@@ the type is bogus */
18631 tree v4sf_ftype_v4sf_pv2si
18632 = build_function_type_list (V4SF_type_node,
18633 V4SF_type_node, pv2si_type_node, NULL_TREE);
18634 tree void_ftype_pv2si_v4sf
18635 = build_function_type_list (void_type_node,
18636 pv2si_type_node, V4SF_type_node, NULL_TREE);
18637 tree void_ftype_pfloat_v4sf
18638 = build_function_type_list (void_type_node,
18639 pfloat_type_node, V4SF_type_node, NULL_TREE);
18640 tree void_ftype_pdi_di
18641 = build_function_type_list (void_type_node,
18642 pdi_type_node, long_long_unsigned_type_node,
18644 tree void_ftype_pv2di_v2di
18645 = build_function_type_list (void_type_node,
18646 pv2di_type_node, V2DI_type_node, NULL_TREE);
18647 /* Normal vector unops. */
18648 tree v4sf_ftype_v4sf
18649 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18650 tree v16qi_ftype_v16qi
18651 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18652 tree v8hi_ftype_v8hi
18653 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18654 tree v4si_ftype_v4si
18655 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18656 tree v8qi_ftype_v8qi
18657 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18658 tree v4hi_ftype_v4hi
18659 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18661 /* Normal vector binops. */
18662 tree v4sf_ftype_v4sf_v4sf
18663 = build_function_type_list (V4SF_type_node,
18664 V4SF_type_node, V4SF_type_node, NULL_TREE);
18665 tree v8qi_ftype_v8qi_v8qi
18666 = build_function_type_list (V8QI_type_node,
18667 V8QI_type_node, V8QI_type_node, NULL_TREE);
18668 tree v4hi_ftype_v4hi_v4hi
18669 = build_function_type_list (V4HI_type_node,
18670 V4HI_type_node, V4HI_type_node, NULL_TREE);
18671 tree v2si_ftype_v2si_v2si
18672 = build_function_type_list (V2SI_type_node,
18673 V2SI_type_node, V2SI_type_node, NULL_TREE);
18674 tree di_ftype_di_di
18675 = build_function_type_list (long_long_unsigned_type_node,
18676 long_long_unsigned_type_node,
18677 long_long_unsigned_type_node, NULL_TREE);
18679 tree di_ftype_di_di_int
18680 = build_function_type_list (long_long_unsigned_type_node,
18681 long_long_unsigned_type_node,
18682 long_long_unsigned_type_node,
18683 integer_type_node, NULL_TREE);
18685 tree v2si_ftype_v2sf
18686 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18687 tree v2sf_ftype_v2si
18688 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18689 tree v2si_ftype_v2si
18690 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18691 tree v2sf_ftype_v2sf
18692 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18693 tree v2sf_ftype_v2sf_v2sf
18694 = build_function_type_list (V2SF_type_node,
18695 V2SF_type_node, V2SF_type_node, NULL_TREE);
18696 tree v2si_ftype_v2sf_v2sf
18697 = build_function_type_list (V2SI_type_node,
18698 V2SF_type_node, V2SF_type_node, NULL_TREE);
18699 tree pint_type_node = build_pointer_type (integer_type_node);
18700 tree pdouble_type_node = build_pointer_type (double_type_node);
18701 tree pcdouble_type_node = build_pointer_type (
18702 build_type_variant (double_type_node, 1, 0));
18703 tree int_ftype_v2df_v2df
18704 = build_function_type_list (integer_type_node,
18705 V2DF_type_node, V2DF_type_node, NULL_TREE);
18707 tree void_ftype_pcvoid
18708 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18709 tree v4sf_ftype_v4si
18710 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18711 tree v4si_ftype_v4sf
18712 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18713 tree v2df_ftype_v4si
18714 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18715 tree v4si_ftype_v2df
18716 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18717 tree v4si_ftype_v2df_v2df
18718 = build_function_type_list (V4SI_type_node,
18719 V2DF_type_node, V2DF_type_node, NULL_TREE);
18720 tree v2si_ftype_v2df
18721 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18722 tree v4sf_ftype_v2df
18723 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18724 tree v2df_ftype_v2si
18725 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18726 tree v2df_ftype_v4sf
18727 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18728 tree int_ftype_v2df
18729 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18730 tree int64_ftype_v2df
18731 = build_function_type_list (long_long_integer_type_node,
18732 V2DF_type_node, NULL_TREE);
18733 tree v2df_ftype_v2df_int
18734 = build_function_type_list (V2DF_type_node,
18735 V2DF_type_node, integer_type_node, NULL_TREE);
18736 tree v2df_ftype_v2df_int64
18737 = build_function_type_list (V2DF_type_node,
18738 V2DF_type_node, long_long_integer_type_node,
18740 tree v4sf_ftype_v4sf_v2df
18741 = build_function_type_list (V4SF_type_node,
18742 V4SF_type_node, V2DF_type_node, NULL_TREE);
18743 tree v2df_ftype_v2df_v4sf
18744 = build_function_type_list (V2DF_type_node,
18745 V2DF_type_node, V4SF_type_node, NULL_TREE);
18746 tree v2df_ftype_v2df_v2df_int
18747 = build_function_type_list (V2DF_type_node,
18748 V2DF_type_node, V2DF_type_node,
18751 tree v2df_ftype_v2df_pcdouble
18752 = build_function_type_list (V2DF_type_node,
18753 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18754 tree void_ftype_pdouble_v2df
18755 = build_function_type_list (void_type_node,
18756 pdouble_type_node, V2DF_type_node, NULL_TREE);
18757 tree void_ftype_pint_int
18758 = build_function_type_list (void_type_node,
18759 pint_type_node, integer_type_node, NULL_TREE);
18760 tree void_ftype_v16qi_v16qi_pchar
18761 = build_function_type_list (void_type_node,
18762 V16QI_type_node, V16QI_type_node,
18763 pchar_type_node, NULL_TREE);
18764 tree v2df_ftype_pcdouble
18765 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18766 tree v2df_ftype_v2df_v2df
18767 = build_function_type_list (V2DF_type_node,
18768 V2DF_type_node, V2DF_type_node, NULL_TREE);
18769 tree v16qi_ftype_v16qi_v16qi
18770 = build_function_type_list (V16QI_type_node,
18771 V16QI_type_node, V16QI_type_node, NULL_TREE);
18772 tree v8hi_ftype_v8hi_v8hi
18773 = build_function_type_list (V8HI_type_node,
18774 V8HI_type_node, V8HI_type_node, NULL_TREE);
18775 tree v4si_ftype_v4si_v4si
18776 = build_function_type_list (V4SI_type_node,
18777 V4SI_type_node, V4SI_type_node, NULL_TREE);
18778 tree v2di_ftype_v2di_v2di
18779 = build_function_type_list (V2DI_type_node,
18780 V2DI_type_node, V2DI_type_node, NULL_TREE);
18781 tree v2di_ftype_v2df_v2df
18782 = build_function_type_list (V2DI_type_node,
18783 V2DF_type_node, V2DF_type_node, NULL_TREE);
18784 tree v2df_ftype_v2df
18785 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18786 tree v2di_ftype_v2di_int
18787 = build_function_type_list (V2DI_type_node,
18788 V2DI_type_node, integer_type_node, NULL_TREE);
18789 tree v2di_ftype_v2di_v2di_int
18790 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18791 V2DI_type_node, integer_type_node, NULL_TREE);
18792 tree v4si_ftype_v4si_int
18793 = build_function_type_list (V4SI_type_node,
18794 V4SI_type_node, integer_type_node, NULL_TREE);
18795 tree v8hi_ftype_v8hi_int
18796 = build_function_type_list (V8HI_type_node,
18797 V8HI_type_node, integer_type_node, NULL_TREE);
18798 tree v4si_ftype_v8hi_v8hi
18799 = build_function_type_list (V4SI_type_node,
18800 V8HI_type_node, V8HI_type_node, NULL_TREE);
18801 tree di_ftype_v8qi_v8qi
18802 = build_function_type_list (long_long_unsigned_type_node,
18803 V8QI_type_node, V8QI_type_node, NULL_TREE);
18804 tree di_ftype_v2si_v2si
18805 = build_function_type_list (long_long_unsigned_type_node,
18806 V2SI_type_node, V2SI_type_node, NULL_TREE);
18807 tree v2di_ftype_v16qi_v16qi
18808 = build_function_type_list (V2DI_type_node,
18809 V16QI_type_node, V16QI_type_node, NULL_TREE);
18810 tree v2di_ftype_v4si_v4si
18811 = build_function_type_list (V2DI_type_node,
18812 V4SI_type_node, V4SI_type_node, NULL_TREE);
18813 tree int_ftype_v16qi
18814 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18815 tree v16qi_ftype_pcchar
18816 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18817 tree void_ftype_pchar_v16qi
18818 = build_function_type_list (void_type_node,
18819 pchar_type_node, V16QI_type_node, NULL_TREE);
18821 tree v2di_ftype_v2di_unsigned_unsigned
18822 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18823 unsigned_type_node, unsigned_type_node,
18825 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18826 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18827 unsigned_type_node, unsigned_type_node,
18829 tree v2di_ftype_v2di_v16qi
18830 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18832 tree v2df_ftype_v2df_v2df_v2df
18833 = build_function_type_list (V2DF_type_node,
18834 V2DF_type_node, V2DF_type_node,
18835 V2DF_type_node, NULL_TREE);
18836 tree v4sf_ftype_v4sf_v4sf_v4sf
18837 = build_function_type_list (V4SF_type_node,
18838 V4SF_type_node, V4SF_type_node,
18839 V4SF_type_node, NULL_TREE);
18840 tree v8hi_ftype_v16qi
18841 = build_function_type_list (V8HI_type_node, V16QI_type_node,
18843 tree v4si_ftype_v16qi
18844 = build_function_type_list (V4SI_type_node, V16QI_type_node,
18846 tree v2di_ftype_v16qi
18847 = build_function_type_list (V2DI_type_node, V16QI_type_node,
18849 tree v4si_ftype_v8hi
18850 = build_function_type_list (V4SI_type_node, V8HI_type_node,
18852 tree v2di_ftype_v8hi
18853 = build_function_type_list (V2DI_type_node, V8HI_type_node,
18855 tree v2di_ftype_v4si
18856 = build_function_type_list (V2DI_type_node, V4SI_type_node,
18858 tree v2di_ftype_pv2di
18859 = build_function_type_list (V2DI_type_node, pv2di_type_node,
18861 tree v16qi_ftype_v16qi_v16qi_int
18862 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18863 V16QI_type_node, integer_type_node,
18865 tree v16qi_ftype_v16qi_v16qi_v16qi
18866 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18867 V16QI_type_node, V16QI_type_node,
18869 tree v8hi_ftype_v8hi_v8hi_int
18870 = build_function_type_list (V8HI_type_node, V8HI_type_node,
18871 V8HI_type_node, integer_type_node,
18873 tree v4si_ftype_v4si_v4si_int
18874 = build_function_type_list (V4SI_type_node, V4SI_type_node,
18875 V4SI_type_node, integer_type_node,
18877 tree int_ftype_v2di_v2di
18878 = build_function_type_list (integer_type_node,
18879 V2DI_type_node, V2DI_type_node,
18881 tree int_ftype_v16qi_int_v16qi_int_int
18882 = build_function_type_list (integer_type_node,
18889 tree v16qi_ftype_v16qi_int_v16qi_int_int
18890 = build_function_type_list (V16QI_type_node,
18897 tree int_ftype_v16qi_v16qi_int
18898 = build_function_type_list (integer_type_node,
18904 /* SSE5 instructions */
18905 tree v2di_ftype_v2di_v2di_v2di
18906 = build_function_type_list (V2DI_type_node,
18912 tree v4si_ftype_v4si_v4si_v4si
18913 = build_function_type_list (V4SI_type_node,
18919 tree v4si_ftype_v4si_v4si_v2di
18920 = build_function_type_list (V4SI_type_node,
18926 tree v8hi_ftype_v8hi_v8hi_v8hi
18927 = build_function_type_list (V8HI_type_node,
18933 tree v8hi_ftype_v8hi_v8hi_v4si
18934 = build_function_type_list (V8HI_type_node,
18940 tree v2df_ftype_v2df_v2df_v16qi
18941 = build_function_type_list (V2DF_type_node,
18947 tree v4sf_ftype_v4sf_v4sf_v16qi
18948 = build_function_type_list (V4SF_type_node,
18954 tree v2di_ftype_v2di_si
18955 = build_function_type_list (V2DI_type_node,
18960 tree v4si_ftype_v4si_si
18961 = build_function_type_list (V4SI_type_node,
18966 tree v8hi_ftype_v8hi_si
18967 = build_function_type_list (V8HI_type_node,
18972 tree v16qi_ftype_v16qi_si
18973 = build_function_type_list (V16QI_type_node,
18977 tree v4sf_ftype_v4hi
18978 = build_function_type_list (V4SF_type_node,
18982 tree v4hi_ftype_v4sf
18983 = build_function_type_list (V4HI_type_node,
18987 tree v2di_ftype_v2di
18988 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18992 /* The __float80 type. */
18993 if (TYPE_MODE (long_double_type_node) == XFmode)
18994 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
18998 /* The __float80 type. */
18999 tree float80_type_node = make_node (REAL_TYPE);
19001 TYPE_PRECISION (float80_type_node) = 80;
19002 layout_type (float80_type_node);
19003 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19009 tree float128_type_node = make_node (REAL_TYPE);
19011 TYPE_PRECISION (float128_type_node) = 128;
19012 layout_type (float128_type_node);
19013 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19016 /* TFmode support builtins. */
19017 ftype = build_function_type (float128_type_node,
19019 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19021 ftype = build_function_type_list (float128_type_node,
19022 float128_type_node,
19024 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19026 ftype = build_function_type_list (float128_type_node,
19027 float128_type_node,
19028 float128_type_node,
19030 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19033 /* Add all SSE builtins that are more or less simple operations on
19035 for (i = 0, d = bdesc_sse_3arg;
19036 i < ARRAY_SIZE (bdesc_sse_3arg);
19039 /* Use one of the operands; the target can have a different mode for
19040 mask-generating compares. */
19041 enum machine_mode mode;
19046 mode = insn_data[d->icode].operand[1].mode;
19051 type = v16qi_ftype_v16qi_v16qi_int;
19054 type = v8hi_ftype_v8hi_v8hi_int;
19057 type = v4si_ftype_v4si_v4si_int;
19060 type = v2di_ftype_v2di_v2di_int;
19063 type = v2df_ftype_v2df_v2df_int;
19066 type = v4sf_ftype_v4sf_v4sf_int;
19069 gcc_unreachable ();
19072 /* Override for variable blends. */
19075 case CODE_FOR_sse4_1_blendvpd:
19076 type = v2df_ftype_v2df_v2df_v2df;
19078 case CODE_FOR_sse4_1_blendvps:
19079 type = v4sf_ftype_v4sf_v4sf_v4sf;
19081 case CODE_FOR_sse4_1_pblendvb:
19082 type = v16qi_ftype_v16qi_v16qi_v16qi;
19088 def_builtin_const (d->mask, d->name, type, d->code);
19091 /* Add all builtins that are more or less simple operations on two
19093 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19095 /* Use one of the operands; the target can have a different mode for
19096 mask-generating compares. */
19097 enum machine_mode mode;
19102 mode = insn_data[d->icode].operand[1].mode;
19107 type = v16qi_ftype_v16qi_v16qi;
19110 type = v8hi_ftype_v8hi_v8hi;
19113 type = v4si_ftype_v4si_v4si;
19116 type = v2di_ftype_v2di_v2di;
19119 type = v2df_ftype_v2df_v2df;
19122 type = v4sf_ftype_v4sf_v4sf;
19125 type = v8qi_ftype_v8qi_v8qi;
19128 type = v4hi_ftype_v4hi_v4hi;
19131 type = v2si_ftype_v2si_v2si;
19134 type = di_ftype_di_di;
19138 gcc_unreachable ();
19141 /* Override for comparisons. */
19142 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19143 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19144 type = v4si_ftype_v4sf_v4sf;
19146 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19147 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19148 type = v2di_ftype_v2df_v2df;
19150 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19151 type = v4si_ftype_v2df_v2df;
19153 def_builtin_const (d->mask, d->name, type, d->code);
19156 /* Add all builtins that are more or less simple operations on 1 operand. */
19157 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19159 enum machine_mode mode;
19164 mode = insn_data[d->icode].operand[1].mode;
19169 type = v16qi_ftype_v16qi;
19172 type = v8hi_ftype_v8hi;
19175 type = v4si_ftype_v4si;
19178 type = v2df_ftype_v2df;
19181 type = v4sf_ftype_v4sf;
19184 type = v8qi_ftype_v8qi;
19187 type = v4hi_ftype_v4hi;
19190 type = v2si_ftype_v2si;
19197 def_builtin_const (d->mask, d->name, type, d->code);
19200 /* pcmpestr[im] insns. */
19201 for (i = 0, d = bdesc_pcmpestr;
19202 i < ARRAY_SIZE (bdesc_pcmpestr);
19205 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19206 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19208 ftype = int_ftype_v16qi_int_v16qi_int_int;
19209 def_builtin_const (d->mask, d->name, ftype, d->code);
19212 /* pcmpistr[im] insns. */
19213 for (i = 0, d = bdesc_pcmpistr;
19214 i < ARRAY_SIZE (bdesc_pcmpistr);
19217 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19218 ftype = v16qi_ftype_v16qi_v16qi_int;
19220 ftype = int_ftype_v16qi_v16qi_int;
19221 def_builtin_const (d->mask, d->name, ftype, d->code);
19224 /* Add the remaining MMX insns with somewhat more complicated types. */
19225 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19226 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
19227 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
19228 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
19230 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
19231 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
19232 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
19234 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
19235 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
19237 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19238 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19240 /* comi/ucomi insns. */
19241 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19242 if (d->mask == OPTION_MASK_ISA_SSE2)
19243 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19245 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19248 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19249 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19251 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19252 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19253 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19255 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19256 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19257 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19258 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19259 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19260 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19261 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19262 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19263 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19264 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19265 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19267 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19269 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19270 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19272 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19273 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19274 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19275 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19277 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19278 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19279 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19280 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19282 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19284 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19286 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19287 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19288 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19289 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS_NR);
19290 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19291 ftype = build_function_type_list (float_type_node,
19294 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19295 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19296 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS_NR);
19297 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19299 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
19301 /* Original 3DNow! */
19302 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19303 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19304 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19305 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19306 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19307 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19308 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19309 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19310 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19311 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19312 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19313 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19314 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19315 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19316 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19317 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19318 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19319 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19320 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19321 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19323 /* 3DNow! extension as used in the Athlon CPU. */
19324 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19325 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19326 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19327 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19328 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19329 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19332 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19334 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19335 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19337 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19338 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19340 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19341 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19342 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19343 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19344 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19346 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19347 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19348 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19349 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19351 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19352 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19354 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
19356 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19357 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19359 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19360 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19361 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19362 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19363 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19365 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19367 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19368 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19369 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19370 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19372 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19373 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19374 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19376 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19377 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19378 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19379 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19381 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19382 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19383 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19385 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19386 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19388 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19389 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19391 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19392 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19393 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19394 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19395 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19396 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19397 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19399 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19400 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19401 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19402 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19403 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19404 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19405 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19407 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19408 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19409 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19410 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19412 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19414 /* Prescott New Instructions. */
19415 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19416 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19417 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19420 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19421 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19424 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19425 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19426 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19427 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19428 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19429 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19430 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19431 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19432 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19433 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19434 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19435 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19436 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19437 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19439 /* SSE4.1 and SSE5 */
19440 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
19441 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
19442 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
19443 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
19446 ftype = build_function_type_list (unsigned_type_node,
19447 unsigned_type_node,
19448 unsigned_char_type_node,
19450 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19451 ftype = build_function_type_list (unsigned_type_node,
19452 unsigned_type_node,
19453 short_unsigned_type_node,
19455 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19456 ftype = build_function_type_list (unsigned_type_node,
19457 unsigned_type_node,
19458 unsigned_type_node,
19460 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19461 ftype = build_function_type_list (long_long_unsigned_type_node,
19462 long_long_unsigned_type_node,
19463 long_long_unsigned_type_node,
19465 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19467 /* AMDFAM10 SSE4A New built-ins */
19468 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19469 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19470 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19471 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19472 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19473 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19475 /* Access to the vec_init patterns. */
19476 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19477 integer_type_node, NULL_TREE);
19478 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19480 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19481 short_integer_type_node,
19482 short_integer_type_node,
19483 short_integer_type_node, NULL_TREE);
19484 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19486 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19487 char_type_node, char_type_node,
19488 char_type_node, char_type_node,
19489 char_type_node, char_type_node,
19490 char_type_node, NULL_TREE);
19491 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19493 /* Access to the vec_extract patterns. */
19494 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19495 integer_type_node, NULL_TREE);
19496 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19498 ftype = build_function_type_list (long_long_integer_type_node,
19499 V2DI_type_node, integer_type_node,
19501 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19503 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19504 integer_type_node, NULL_TREE);
19505 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19507 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19508 integer_type_node, NULL_TREE);
19509 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19511 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19512 integer_type_node, NULL_TREE);
19513 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19515 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19516 integer_type_node, NULL_TREE);
19517 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19519 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19520 integer_type_node, NULL_TREE);
19521 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19523 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19524 integer_type_node, NULL_TREE);
19525 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19527 /* Access to the vec_set patterns. */
19528 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19530 integer_type_node, NULL_TREE);
19531 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19533 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19535 integer_type_node, NULL_TREE);
19536 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19538 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19540 integer_type_node, NULL_TREE);
19541 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19543 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19545 integer_type_node, NULL_TREE);
19546 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19548 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19550 integer_type_node, NULL_TREE);
19551 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19553 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19555 integer_type_node, NULL_TREE);
19556 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19558 /* Add SSE5 multi-arg argument instructions */
19559 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19561 tree mtype = NULL_TREE;
19566 switch ((enum multi_arg_type)d->flag)
19568 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19569 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19570 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19571 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19572 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19573 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19574 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19575 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19576 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19577 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19578 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19579 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19580 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19581 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19582 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19583 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19584 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19585 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19586 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19587 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19588 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19589 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19590 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19591 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19592 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19593 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19594 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19595 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19596 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19597 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19598 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19599 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19600 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19601 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19602 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19603 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19604 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19605 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19606 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19607 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19608 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19609 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19610 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19611 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19612 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19613 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19614 case MULTI_ARG_UNKNOWN:
19616 gcc_unreachable ();
19620 def_builtin_const (d->mask, d->name, mtype, d->code);
19625 ix86_init_builtins (void)
19628 ix86_init_mmx_sse_builtins ();
19631 /* Errors in the source file can cause expand_expr to return const0_rtx
19632 where we expect a vector. To avoid crashing, use one of the vector
19633 clear instructions. */
19635 safe_vector_operand (rtx x, enum machine_mode mode)
19637 if (x == const0_rtx)
19638 x = CONST0_RTX (mode);
19642 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19643 4 operands. The third argument must be a constant smaller than 8
19647 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
19651 tree arg0 = CALL_EXPR_ARG (exp, 0);
19652 tree arg1 = CALL_EXPR_ARG (exp, 1);
19653 tree arg2 = CALL_EXPR_ARG (exp, 2);
19654 rtx op0 = expand_normal (arg0);
19655 rtx op1 = expand_normal (arg1);
19656 rtx op2 = expand_normal (arg2);
19657 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19658 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19659 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
19660 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
19662 if (VECTOR_MODE_P (mode1))
19663 op0 = safe_vector_operand (op0, mode1);
19664 if (VECTOR_MODE_P (mode2))
19665 op1 = safe_vector_operand (op1, mode2);
19666 if (VECTOR_MODE_P (mode3))
19667 op2 = safe_vector_operand (op2, mode3);
19671 || GET_MODE (target) != tmode
19672 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19673 target = gen_reg_rtx (tmode);
19675 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19676 op0 = copy_to_mode_reg (mode1, op0);
19677 if ((optimize && !register_operand (op1, mode2))
19678 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
19679 op1 = copy_to_mode_reg (mode2, op1);
19681 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19684 case CODE_FOR_sse4_1_blendvpd:
19685 case CODE_FOR_sse4_1_blendvps:
19686 case CODE_FOR_sse4_1_pblendvb:
19687 op2 = copy_to_mode_reg (mode3, op2);
19690 case CODE_FOR_sse4_1_roundsd:
19691 case CODE_FOR_sse4_1_roundss:
19692 error ("the third argument must be a 4-bit immediate");
19696 error ("the third argument must be an 8-bit immediate");
19700 pat = GEN_FCN (icode) (target, op0, op1, op2);
19707 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19710 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19713 tree arg0 = CALL_EXPR_ARG (exp, 0);
19714 tree arg1 = CALL_EXPR_ARG (exp, 1);
19715 rtx op0 = expand_normal (arg0);
19716 rtx op1 = expand_normal (arg1);
19717 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19718 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19719 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19723 || GET_MODE (target) != tmode
19724 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19725 target = gen_reg_rtx (tmode);
19727 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19728 op0 = copy_to_mode_reg (mode0, op0);
19729 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19731 op1 = copy_to_reg (op1);
19732 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19735 pat = GEN_FCN (icode) (target, op0, op1);
19742 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19745 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
19748 tree arg0 = CALL_EXPR_ARG (exp, 0);
19749 tree arg1 = CALL_EXPR_ARG (exp, 1);
19750 rtx op0 = expand_normal (arg0);
19751 rtx op1 = expand_normal (arg1);
19752 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19753 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19754 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19756 if (VECTOR_MODE_P (mode0))
19757 op0 = safe_vector_operand (op0, mode0);
19758 if (VECTOR_MODE_P (mode1))
19759 op1 = safe_vector_operand (op1, mode1);
19761 if (optimize || !target
19762 || GET_MODE (target) != tmode
19763 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19764 target = gen_reg_rtx (tmode);
19766 if (GET_MODE (op1) == SImode && mode1 == TImode)
19768 rtx x = gen_reg_rtx (V4SImode);
19769 emit_insn (gen_sse2_loadd (x, op1));
19770 op1 = gen_lowpart (TImode, x);
19773 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19774 op0 = copy_to_mode_reg (mode0, op0);
19775 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19776 op1 = copy_to_mode_reg (mode1, op1);
19778 /* ??? Using ix86_fixup_binary_operands is problematic when
19779 we've got mismatched modes. Fake it. */
19785 if (tmode == mode0 && tmode == mode1)
19787 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
19791 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
19793 op0 = force_reg (mode0, op0);
19794 op1 = force_reg (mode1, op1);
19795 target = gen_reg_rtx (tmode);
19798 pat = GEN_FCN (icode) (target, op0, op1);
19805 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19808 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
19809 enum multi_arg_type m_type,
19810 enum insn_code sub_code)
19815 bool comparison_p = false;
19817 bool last_arg_constant = false;
19818 int num_memory = 0;
19821 enum machine_mode mode;
19824 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19828 case MULTI_ARG_3_SF:
19829 case MULTI_ARG_3_DF:
19830 case MULTI_ARG_3_DI:
19831 case MULTI_ARG_3_SI:
19832 case MULTI_ARG_3_SI_DI:
19833 case MULTI_ARG_3_HI:
19834 case MULTI_ARG_3_HI_SI:
19835 case MULTI_ARG_3_QI:
19836 case MULTI_ARG_3_PERMPS:
19837 case MULTI_ARG_3_PERMPD:
19841 case MULTI_ARG_2_SF:
19842 case MULTI_ARG_2_DF:
19843 case MULTI_ARG_2_DI:
19844 case MULTI_ARG_2_SI:
19845 case MULTI_ARG_2_HI:
19846 case MULTI_ARG_2_QI:
19850 case MULTI_ARG_2_DI_IMM:
19851 case MULTI_ARG_2_SI_IMM:
19852 case MULTI_ARG_2_HI_IMM:
19853 case MULTI_ARG_2_QI_IMM:
19855 last_arg_constant = true;
19858 case MULTI_ARG_1_SF:
19859 case MULTI_ARG_1_DF:
19860 case MULTI_ARG_1_DI:
19861 case MULTI_ARG_1_SI:
19862 case MULTI_ARG_1_HI:
19863 case MULTI_ARG_1_QI:
19864 case MULTI_ARG_1_SI_DI:
19865 case MULTI_ARG_1_HI_DI:
19866 case MULTI_ARG_1_HI_SI:
19867 case MULTI_ARG_1_QI_DI:
19868 case MULTI_ARG_1_QI_SI:
19869 case MULTI_ARG_1_QI_HI:
19870 case MULTI_ARG_1_PH2PS:
19871 case MULTI_ARG_1_PS2PH:
19875 case MULTI_ARG_2_SF_CMP:
19876 case MULTI_ARG_2_DF_CMP:
19877 case MULTI_ARG_2_DI_CMP:
19878 case MULTI_ARG_2_SI_CMP:
19879 case MULTI_ARG_2_HI_CMP:
19880 case MULTI_ARG_2_QI_CMP:
19882 comparison_p = true;
19885 case MULTI_ARG_2_SF_TF:
19886 case MULTI_ARG_2_DF_TF:
19887 case MULTI_ARG_2_DI_TF:
19888 case MULTI_ARG_2_SI_TF:
19889 case MULTI_ARG_2_HI_TF:
19890 case MULTI_ARG_2_QI_TF:
19895 case MULTI_ARG_UNKNOWN:
19897 gcc_unreachable ();
19900 if (optimize || !target
19901 || GET_MODE (target) != tmode
19902 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19903 target = gen_reg_rtx (tmode);
19905 gcc_assert (nargs <= 4);
19907 for (i = 0; i < nargs; i++)
19909 tree arg = CALL_EXPR_ARG (exp, i);
19910 rtx op = expand_normal (arg);
19911 int adjust = (comparison_p) ? 1 : 0;
19912 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
19914 if (last_arg_constant && i == nargs-1)
19916 if (GET_CODE (op) != CONST_INT)
19918 error ("last argument must be an immediate");
19919 return gen_reg_rtx (tmode);
19924 if (VECTOR_MODE_P (mode))
19925 op = safe_vector_operand (op, mode);
19927 /* If we aren't optimizing, only allow one memory operand to be
19929 if (memory_operand (op, mode))
19932 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
19935 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
19937 op = force_reg (mode, op);
19941 args[i].mode = mode;
19947 pat = GEN_FCN (icode) (target, args[0].op);
19952 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19953 GEN_INT ((int)sub_code));
19954 else if (! comparison_p)
19955 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19958 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
19962 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
19967 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
19971 gcc_unreachable ();
19981 /* Subroutine of ix86_expand_builtin to take care of stores. */
19984 ix86_expand_store_builtin (enum insn_code icode, tree exp)
19987 tree arg0 = CALL_EXPR_ARG (exp, 0);
19988 tree arg1 = CALL_EXPR_ARG (exp, 1);
19989 rtx op0 = expand_normal (arg0);
19990 rtx op1 = expand_normal (arg1);
19991 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
19992 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19994 if (VECTOR_MODE_P (mode1))
19995 op1 = safe_vector_operand (op1, mode1);
19997 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19998 op1 = copy_to_mode_reg (mode1, op1);
20000 pat = GEN_FCN (icode) (op0, op1);
20006 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
20009 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
20010 rtx target, int do_load)
20013 tree arg0 = CALL_EXPR_ARG (exp, 0);
20014 rtx op0 = expand_normal (arg0);
20015 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20016 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20018 if (optimize || !target
20019 || GET_MODE (target) != tmode
20020 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20021 target = gen_reg_rtx (tmode);
20023 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20026 if (VECTOR_MODE_P (mode0))
20027 op0 = safe_vector_operand (op0, mode0);
20029 if ((optimize && !register_operand (op0, mode0))
20030 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20031 op0 = copy_to_mode_reg (mode0, op0);
20036 case CODE_FOR_sse4_1_roundpd:
20037 case CODE_FOR_sse4_1_roundps:
20039 tree arg1 = CALL_EXPR_ARG (exp, 1);
20040 rtx op1 = expand_normal (arg1);
20041 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20043 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20045 error ("the second argument must be a 4-bit immediate");
20048 pat = GEN_FCN (icode) (target, op0, op1);
20052 pat = GEN_FCN (icode) (target, op0);
20062 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20063 sqrtss, rsqrtss, rcpss. */
20066 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
20069 tree arg0 = CALL_EXPR_ARG (exp, 0);
20070 rtx op1, op0 = expand_normal (arg0);
20071 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20072 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20074 if (optimize || !target
20075 || GET_MODE (target) != tmode
20076 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20077 target = gen_reg_rtx (tmode);
20079 if (VECTOR_MODE_P (mode0))
20080 op0 = safe_vector_operand (op0, mode0);
20082 if ((optimize && !register_operand (op0, mode0))
20083 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20084 op0 = copy_to_mode_reg (mode0, op0);
20087 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20088 op1 = copy_to_mode_reg (mode0, op1);
20090 pat = GEN_FCN (icode) (target, op0, op1);
20097 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20100 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
20104 tree arg0 = CALL_EXPR_ARG (exp, 0);
20105 tree arg1 = CALL_EXPR_ARG (exp, 1);
20106 rtx op0 = expand_normal (arg0);
20107 rtx op1 = expand_normal (arg1);
20109 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20110 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20111 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20112 enum rtx_code comparison = d->comparison;
20114 if (VECTOR_MODE_P (mode0))
20115 op0 = safe_vector_operand (op0, mode0);
20116 if (VECTOR_MODE_P (mode1))
20117 op1 = safe_vector_operand (op1, mode1);
20119 /* Swap operands if we have a comparison that isn't available in
20121 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20123 rtx tmp = gen_reg_rtx (mode1);
20124 emit_move_insn (tmp, op1);
20129 if (optimize || !target
20130 || GET_MODE (target) != tmode
20131 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20132 target = gen_reg_rtx (tmode);
20134 if ((optimize && !register_operand (op0, mode0))
20135 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20136 op0 = copy_to_mode_reg (mode0, op0);
20137 if ((optimize && !register_operand (op1, mode1))
20138 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20139 op1 = copy_to_mode_reg (mode1, op1);
20141 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20142 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20149 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20152 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20156 tree arg0 = CALL_EXPR_ARG (exp, 0);
20157 tree arg1 = CALL_EXPR_ARG (exp, 1);
20158 rtx op0 = expand_normal (arg0);
20159 rtx op1 = expand_normal (arg1);
20160 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20161 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20162 enum rtx_code comparison = d->comparison;
20164 if (VECTOR_MODE_P (mode0))
20165 op0 = safe_vector_operand (op0, mode0);
20166 if (VECTOR_MODE_P (mode1))
20167 op1 = safe_vector_operand (op1, mode1);
20169 /* Swap operands if we have a comparison that isn't available in
20171 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20178 target = gen_reg_rtx (SImode);
20179 emit_move_insn (target, const0_rtx);
20180 target = gen_rtx_SUBREG (QImode, target, 0);
20182 if ((optimize && !register_operand (op0, mode0))
20183 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20184 op0 = copy_to_mode_reg (mode0, op0);
20185 if ((optimize && !register_operand (op1, mode1))
20186 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20187 op1 = copy_to_mode_reg (mode1, op1);
20189 pat = GEN_FCN (d->icode) (op0, op1);
20193 emit_insn (gen_rtx_SET (VOIDmode,
20194 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20195 gen_rtx_fmt_ee (comparison, QImode,
20199 return SUBREG_REG (target);
20202 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20205 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20209 tree arg0 = CALL_EXPR_ARG (exp, 0);
20210 tree arg1 = CALL_EXPR_ARG (exp, 1);
20211 rtx op0 = expand_normal (arg0);
20212 rtx op1 = expand_normal (arg1);
20213 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20214 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20215 enum rtx_code comparison = d->comparison;
20217 if (VECTOR_MODE_P (mode0))
20218 op0 = safe_vector_operand (op0, mode0);
20219 if (VECTOR_MODE_P (mode1))
20220 op1 = safe_vector_operand (op1, mode1);
20222 target = gen_reg_rtx (SImode);
20223 emit_move_insn (target, const0_rtx);
20224 target = gen_rtx_SUBREG (QImode, target, 0);
20226 if ((optimize && !register_operand (op0, mode0))
20227 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20228 op0 = copy_to_mode_reg (mode0, op0);
20229 if ((optimize && !register_operand (op1, mode1))
20230 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20231 op1 = copy_to_mode_reg (mode1, op1);
20233 pat = GEN_FCN (d->icode) (op0, op1);
20237 emit_insn (gen_rtx_SET (VOIDmode,
20238 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20239 gen_rtx_fmt_ee (comparison, QImode,
20243 return SUBREG_REG (target);
20246 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20249 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20250 tree exp, rtx target)
20253 tree arg0 = CALL_EXPR_ARG (exp, 0);
20254 tree arg1 = CALL_EXPR_ARG (exp, 1);
20255 tree arg2 = CALL_EXPR_ARG (exp, 2);
20256 tree arg3 = CALL_EXPR_ARG (exp, 3);
20257 tree arg4 = CALL_EXPR_ARG (exp, 4);
20258 rtx scratch0, scratch1;
20259 rtx op0 = expand_normal (arg0);
20260 rtx op1 = expand_normal (arg1);
20261 rtx op2 = expand_normal (arg2);
20262 rtx op3 = expand_normal (arg3);
20263 rtx op4 = expand_normal (arg4);
20264 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20266 tmode0 = insn_data[d->icode].operand[0].mode;
20267 tmode1 = insn_data[d->icode].operand[1].mode;
20268 modev2 = insn_data[d->icode].operand[2].mode;
20269 modei3 = insn_data[d->icode].operand[3].mode;
20270 modev4 = insn_data[d->icode].operand[4].mode;
20271 modei5 = insn_data[d->icode].operand[5].mode;
20272 modeimm = insn_data[d->icode].operand[6].mode;
20274 if (VECTOR_MODE_P (modev2))
20275 op0 = safe_vector_operand (op0, modev2);
20276 if (VECTOR_MODE_P (modev4))
20277 op2 = safe_vector_operand (op2, modev4);
20279 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20280 op0 = copy_to_mode_reg (modev2, op0);
20281 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20282 op1 = copy_to_mode_reg (modei3, op1);
20283 if ((optimize && !register_operand (op2, modev4))
20284 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20285 op2 = copy_to_mode_reg (modev4, op2);
20286 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20287 op3 = copy_to_mode_reg (modei5, op3);
20289 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20291 error ("the fifth argument must be a 8-bit immediate");
20295 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20297 if (optimize || !target
20298 || GET_MODE (target) != tmode0
20299 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20300 target = gen_reg_rtx (tmode0);
20302 scratch1 = gen_reg_rtx (tmode1);
20304 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20306 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20308 if (optimize || !target
20309 || GET_MODE (target) != tmode1
20310 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20311 target = gen_reg_rtx (tmode1);
20313 scratch0 = gen_reg_rtx (tmode0);
20315 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20319 gcc_assert (d->flag);
20321 scratch0 = gen_reg_rtx (tmode0);
20322 scratch1 = gen_reg_rtx (tmode1);
20324 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20334 target = gen_reg_rtx (SImode);
20335 emit_move_insn (target, const0_rtx);
20336 target = gen_rtx_SUBREG (QImode, target, 0);
20339 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20340 gen_rtx_fmt_ee (EQ, QImode,
20341 gen_rtx_REG ((enum machine_mode) d->flag,
20344 return SUBREG_REG (target);
20351 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20354 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20355 tree exp, rtx target)
20358 tree arg0 = CALL_EXPR_ARG (exp, 0);
20359 tree arg1 = CALL_EXPR_ARG (exp, 1);
20360 tree arg2 = CALL_EXPR_ARG (exp, 2);
20361 rtx scratch0, scratch1;
20362 rtx op0 = expand_normal (arg0);
20363 rtx op1 = expand_normal (arg1);
20364 rtx op2 = expand_normal (arg2);
20365 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20367 tmode0 = insn_data[d->icode].operand[0].mode;
20368 tmode1 = insn_data[d->icode].operand[1].mode;
20369 modev2 = insn_data[d->icode].operand[2].mode;
20370 modev3 = insn_data[d->icode].operand[3].mode;
20371 modeimm = insn_data[d->icode].operand[4].mode;
20373 if (VECTOR_MODE_P (modev2))
20374 op0 = safe_vector_operand (op0, modev2);
20375 if (VECTOR_MODE_P (modev3))
20376 op1 = safe_vector_operand (op1, modev3);
20378 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20379 op0 = copy_to_mode_reg (modev2, op0);
20380 if ((optimize && !register_operand (op1, modev3))
20381 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20382 op1 = copy_to_mode_reg (modev3, op1);
20384 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20386 error ("the third argument must be a 8-bit immediate");
20390 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20392 if (optimize || !target
20393 || GET_MODE (target) != tmode0
20394 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20395 target = gen_reg_rtx (tmode0);
20397 scratch1 = gen_reg_rtx (tmode1);
20399 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20401 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20403 if (optimize || !target
20404 || GET_MODE (target) != tmode1
20405 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20406 target = gen_reg_rtx (tmode1);
20408 scratch0 = gen_reg_rtx (tmode0);
20410 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20414 gcc_assert (d->flag);
20416 scratch0 = gen_reg_rtx (tmode0);
20417 scratch1 = gen_reg_rtx (tmode1);
20419 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20429 target = gen_reg_rtx (SImode);
20430 emit_move_insn (target, const0_rtx);
20431 target = gen_rtx_SUBREG (QImode, target, 0);
20434 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20435 gen_rtx_fmt_ee (EQ, QImode,
20436 gen_rtx_REG ((enum machine_mode) d->flag,
20439 return SUBREG_REG (target);
20445 /* Return the integer constant in ARG. Constrain it to be in the range
20446 of the subparts of VEC_TYPE; issue an error if not. */
20449 get_element_number (tree vec_type, tree arg)
20451 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20453 if (!host_integerp (arg, 1)
20454 || (elt = tree_low_cst (arg, 1), elt > max))
20456 error ("selector must be an integer constant in the range 0..%wi", max);
20463 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20464 ix86_expand_vector_init. We DO have language-level syntax for this, in
20465 the form of (type){ init-list }. Except that since we can't place emms
20466 instructions from inside the compiler, we can't allow the use of MMX
20467 registers unless the user explicitly asks for it. So we do *not* define
20468 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20469 we have builtins invoked by mmintrin.h that gives us license to emit
20470 these sorts of instructions. */
20473 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20475 enum machine_mode tmode = TYPE_MODE (type);
20476 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20477 int i, n_elt = GET_MODE_NUNITS (tmode);
20478 rtvec v = rtvec_alloc (n_elt);
20480 gcc_assert (VECTOR_MODE_P (tmode));
20481 gcc_assert (call_expr_nargs (exp) == n_elt);
20483 for (i = 0; i < n_elt; ++i)
20485 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20486 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20489 if (!target || !register_operand (target, tmode))
20490 target = gen_reg_rtx (tmode);
20492 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20496 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20497 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20498 had a language-level syntax for referencing vector elements. */
20501 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20503 enum machine_mode tmode, mode0;
20508 arg0 = CALL_EXPR_ARG (exp, 0);
20509 arg1 = CALL_EXPR_ARG (exp, 1);
20511 op0 = expand_normal (arg0);
20512 elt = get_element_number (TREE_TYPE (arg0), arg1);
20514 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20515 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20516 gcc_assert (VECTOR_MODE_P (mode0));
20518 op0 = force_reg (mode0, op0);
20520 if (optimize || !target || !register_operand (target, tmode))
20521 target = gen_reg_rtx (tmode);
20523 ix86_expand_vector_extract (true, target, op0, elt);
20528 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20529 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20530 a language-level syntax for referencing vector elements. */
20533 ix86_expand_vec_set_builtin (tree exp)
20535 enum machine_mode tmode, mode1;
20536 tree arg0, arg1, arg2;
20538 rtx op0, op1, target;
20540 arg0 = CALL_EXPR_ARG (exp, 0);
20541 arg1 = CALL_EXPR_ARG (exp, 1);
20542 arg2 = CALL_EXPR_ARG (exp, 2);
20544 tmode = TYPE_MODE (TREE_TYPE (arg0));
20545 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20546 gcc_assert (VECTOR_MODE_P (tmode));
20548 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20549 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20550 elt = get_element_number (TREE_TYPE (arg0), arg2);
20552 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20553 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20555 op0 = force_reg (tmode, op0);
20556 op1 = force_reg (mode1, op1);
20558 /* OP0 is the source of these builtin functions and shouldn't be
20559 modified. Create a copy, use it and return it as target. */
20560 target = gen_reg_rtx (tmode);
20561 emit_move_insn (target, op0);
20562 ix86_expand_vector_set (true, target, op1, elt);
20567 /* Expand an expression EXP that calls a built-in function,
20568 with result going to TARGET if that's convenient
20569 (and in mode MODE if that's convenient).
20570 SUBTARGET may be used as the target for computing one of EXP's operands.
20571 IGNORE is nonzero if the value is to be ignored. */
20574 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20575 enum machine_mode mode ATTRIBUTE_UNUSED,
20576 int ignore ATTRIBUTE_UNUSED)
20578 const struct builtin_description *d;
20580 enum insn_code icode;
20581 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20582 tree arg0, arg1, arg2, arg3;
20583 rtx op0, op1, op2, op3, pat;
20584 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20585 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20589 case IX86_BUILTIN_EMMS:
20590 emit_insn (gen_mmx_emms ());
20593 case IX86_BUILTIN_SFENCE:
20594 emit_insn (gen_sse_sfence ());
20597 case IX86_BUILTIN_MASKMOVQ:
20598 case IX86_BUILTIN_MASKMOVDQU:
20599 icode = (fcode == IX86_BUILTIN_MASKMOVQ
20600 ? CODE_FOR_mmx_maskmovq
20601 : CODE_FOR_sse2_maskmovdqu);
20602 /* Note the arg order is different from the operand order. */
20603 arg1 = CALL_EXPR_ARG (exp, 0);
20604 arg2 = CALL_EXPR_ARG (exp, 1);
20605 arg0 = CALL_EXPR_ARG (exp, 2);
20606 op0 = expand_normal (arg0);
20607 op1 = expand_normal (arg1);
20608 op2 = expand_normal (arg2);
20609 mode0 = insn_data[icode].operand[0].mode;
20610 mode1 = insn_data[icode].operand[1].mode;
20611 mode2 = insn_data[icode].operand[2].mode;
20613 op0 = force_reg (Pmode, op0);
20614 op0 = gen_rtx_MEM (mode1, op0);
20616 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20617 op0 = copy_to_mode_reg (mode0, op0);
20618 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20619 op1 = copy_to_mode_reg (mode1, op1);
20620 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20621 op2 = copy_to_mode_reg (mode2, op2);
20622 pat = GEN_FCN (icode) (op0, op1, op2);
20628 case IX86_BUILTIN_RSQRTF:
20629 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20631 case IX86_BUILTIN_SQRTSS:
20632 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20633 case IX86_BUILTIN_RSQRTSS:
20634 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20635 case IX86_BUILTIN_RCPSS:
20636 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20638 case IX86_BUILTIN_LOADUPS:
20639 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20641 case IX86_BUILTIN_STOREUPS:
20642 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20644 case IX86_BUILTIN_LOADHPS:
20645 case IX86_BUILTIN_LOADLPS:
20646 case IX86_BUILTIN_LOADHPD:
20647 case IX86_BUILTIN_LOADLPD:
20648 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20649 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20650 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20651 : CODE_FOR_sse2_loadlpd);
20652 arg0 = CALL_EXPR_ARG (exp, 0);
20653 arg1 = CALL_EXPR_ARG (exp, 1);
20654 op0 = expand_normal (arg0);
20655 op1 = expand_normal (arg1);
20656 tmode = insn_data[icode].operand[0].mode;
20657 mode0 = insn_data[icode].operand[1].mode;
20658 mode1 = insn_data[icode].operand[2].mode;
20660 op0 = force_reg (mode0, op0);
20661 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20662 if (optimize || target == 0
20663 || GET_MODE (target) != tmode
20664 || !register_operand (target, tmode))
20665 target = gen_reg_rtx (tmode);
20666 pat = GEN_FCN (icode) (target, op0, op1);
20672 case IX86_BUILTIN_STOREHPS:
20673 case IX86_BUILTIN_STORELPS:
20674 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20675 : CODE_FOR_sse_storelps);
20676 arg0 = CALL_EXPR_ARG (exp, 0);
20677 arg1 = CALL_EXPR_ARG (exp, 1);
20678 op0 = expand_normal (arg0);
20679 op1 = expand_normal (arg1);
20680 mode0 = insn_data[icode].operand[0].mode;
20681 mode1 = insn_data[icode].operand[1].mode;
20683 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20684 op1 = force_reg (mode1, op1);
20686 pat = GEN_FCN (icode) (op0, op1);
20692 case IX86_BUILTIN_MOVNTPS:
20693 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20694 case IX86_BUILTIN_MOVNTQ:
20695 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20697 case IX86_BUILTIN_LDMXCSR:
20698 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20699 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20700 emit_move_insn (target, op0);
20701 emit_insn (gen_sse_ldmxcsr (target));
20704 case IX86_BUILTIN_STMXCSR:
20705 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20706 emit_insn (gen_sse_stmxcsr (target));
20707 return copy_to_mode_reg (SImode, target);
20709 case IX86_BUILTIN_SHUFPS:
20710 case IX86_BUILTIN_SHUFPD:
20711 icode = (fcode == IX86_BUILTIN_SHUFPS
20712 ? CODE_FOR_sse_shufps
20713 : CODE_FOR_sse2_shufpd);
20714 arg0 = CALL_EXPR_ARG (exp, 0);
20715 arg1 = CALL_EXPR_ARG (exp, 1);
20716 arg2 = CALL_EXPR_ARG (exp, 2);
20717 op0 = expand_normal (arg0);
20718 op1 = expand_normal (arg1);
20719 op2 = expand_normal (arg2);
20720 tmode = insn_data[icode].operand[0].mode;
20721 mode0 = insn_data[icode].operand[1].mode;
20722 mode1 = insn_data[icode].operand[2].mode;
20723 mode2 = insn_data[icode].operand[3].mode;
20725 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20726 op0 = copy_to_mode_reg (mode0, op0);
20727 if ((optimize && !register_operand (op1, mode1))
20728 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
20729 op1 = copy_to_mode_reg (mode1, op1);
20730 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20732 /* @@@ better error message */
20733 error ("mask must be an immediate");
20734 return gen_reg_rtx (tmode);
20736 if (optimize || target == 0
20737 || GET_MODE (target) != tmode
20738 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20739 target = gen_reg_rtx (tmode);
20740 pat = GEN_FCN (icode) (target, op0, op1, op2);
20746 case IX86_BUILTIN_PSHUFW:
20747 case IX86_BUILTIN_PSHUFD:
20748 case IX86_BUILTIN_PSHUFHW:
20749 case IX86_BUILTIN_PSHUFLW:
20750 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20751 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20752 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20753 : CODE_FOR_mmx_pshufw);
20754 arg0 = CALL_EXPR_ARG (exp, 0);
20755 arg1 = CALL_EXPR_ARG (exp, 1);
20756 op0 = expand_normal (arg0);
20757 op1 = expand_normal (arg1);
20758 tmode = insn_data[icode].operand[0].mode;
20759 mode1 = insn_data[icode].operand[1].mode;
20760 mode2 = insn_data[icode].operand[2].mode;
20762 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20763 op0 = copy_to_mode_reg (mode1, op0);
20764 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20766 /* @@@ better error message */
20767 error ("mask must be an immediate");
20771 || GET_MODE (target) != tmode
20772 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20773 target = gen_reg_rtx (tmode);
20774 pat = GEN_FCN (icode) (target, op0, op1);
20780 case IX86_BUILTIN_PSLLW128:
20781 case IX86_BUILTIN_PSLLWI128:
20782 icode = CODE_FOR_ashlv8hi3;
20784 case IX86_BUILTIN_PSLLD128:
20785 case IX86_BUILTIN_PSLLDI128:
20786 icode = CODE_FOR_ashlv4si3;
20788 case IX86_BUILTIN_PSLLQ128:
20789 case IX86_BUILTIN_PSLLQI128:
20790 icode = CODE_FOR_ashlv2di3;
20792 case IX86_BUILTIN_PSRAW128:
20793 case IX86_BUILTIN_PSRAWI128:
20794 icode = CODE_FOR_ashrv8hi3;
20796 case IX86_BUILTIN_PSRAD128:
20797 case IX86_BUILTIN_PSRADI128:
20798 icode = CODE_FOR_ashrv4si3;
20800 case IX86_BUILTIN_PSRLW128:
20801 case IX86_BUILTIN_PSRLWI128:
20802 icode = CODE_FOR_lshrv8hi3;
20804 case IX86_BUILTIN_PSRLD128:
20805 case IX86_BUILTIN_PSRLDI128:
20806 icode = CODE_FOR_lshrv4si3;
20808 case IX86_BUILTIN_PSRLQ128:
20809 case IX86_BUILTIN_PSRLQI128:
20810 icode = CODE_FOR_lshrv2di3;
20813 arg0 = CALL_EXPR_ARG (exp, 0);
20814 arg1 = CALL_EXPR_ARG (exp, 1);
20815 op0 = expand_normal (arg0);
20816 op1 = expand_normal (arg1);
20818 tmode = insn_data[icode].operand[0].mode;
20819 mode1 = insn_data[icode].operand[1].mode;
20821 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20822 op0 = copy_to_reg (op0);
20824 if (!CONST_INT_P (op1))
20825 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
20827 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
20828 op1 = copy_to_reg (op1);
20830 target = gen_reg_rtx (tmode);
20831 pat = GEN_FCN (icode) (target, op0, op1);
20837 case IX86_BUILTIN_PSLLDQI128:
20838 case IX86_BUILTIN_PSRLDQI128:
20839 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
20840 : CODE_FOR_sse2_lshrti3);
20841 arg0 = CALL_EXPR_ARG (exp, 0);
20842 arg1 = CALL_EXPR_ARG (exp, 1);
20843 op0 = expand_normal (arg0);
20844 op1 = expand_normal (arg1);
20845 tmode = insn_data[icode].operand[0].mode;
20846 mode1 = insn_data[icode].operand[1].mode;
20847 mode2 = insn_data[icode].operand[2].mode;
20849 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20851 op0 = copy_to_reg (op0);
20852 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20854 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20856 error ("shift must be an immediate");
20859 target = gen_reg_rtx (V2DImode);
20860 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
20867 case IX86_BUILTIN_FEMMS:
20868 emit_insn (gen_mmx_femms ());
20871 case IX86_BUILTIN_PAVGUSB:
20872 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
20874 case IX86_BUILTIN_PF2ID:
20875 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
20877 case IX86_BUILTIN_PFACC:
20878 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
20880 case IX86_BUILTIN_PFADD:
20881 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
20883 case IX86_BUILTIN_PFCMPEQ:
20884 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
20886 case IX86_BUILTIN_PFCMPGE:
20887 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
20889 case IX86_BUILTIN_PFCMPGT:
20890 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
20892 case IX86_BUILTIN_PFMAX:
20893 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
20895 case IX86_BUILTIN_PFMIN:
20896 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
20898 case IX86_BUILTIN_PFMUL:
20899 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
20901 case IX86_BUILTIN_PFRCP:
20902 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
20904 case IX86_BUILTIN_PFRCPIT1:
20905 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
20907 case IX86_BUILTIN_PFRCPIT2:
20908 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
20910 case IX86_BUILTIN_PFRSQIT1:
20911 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
20913 case IX86_BUILTIN_PFRSQRT:
20914 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
20916 case IX86_BUILTIN_PFSUB:
20917 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
20919 case IX86_BUILTIN_PFSUBR:
20920 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
20922 case IX86_BUILTIN_PI2FD:
20923 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
20925 case IX86_BUILTIN_PMULHRW:
20926 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
20928 case IX86_BUILTIN_PF2IW:
20929 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
20931 case IX86_BUILTIN_PFNACC:
20932 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
20934 case IX86_BUILTIN_PFPNACC:
20935 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
20937 case IX86_BUILTIN_PI2FW:
20938 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
20940 case IX86_BUILTIN_PSWAPDSI:
20941 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
20943 case IX86_BUILTIN_PSWAPDSF:
20944 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
20946 case IX86_BUILTIN_SQRTSD:
20947 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
20948 case IX86_BUILTIN_LOADUPD:
20949 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
20950 case IX86_BUILTIN_STOREUPD:
20951 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
20953 case IX86_BUILTIN_MFENCE:
20954 emit_insn (gen_sse2_mfence ());
20956 case IX86_BUILTIN_LFENCE:
20957 emit_insn (gen_sse2_lfence ());
20960 case IX86_BUILTIN_CLFLUSH:
20961 arg0 = CALL_EXPR_ARG (exp, 0);
20962 op0 = expand_normal (arg0);
20963 icode = CODE_FOR_sse2_clflush;
20964 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
20965 op0 = copy_to_mode_reg (Pmode, op0);
20967 emit_insn (gen_sse2_clflush (op0));
20970 case IX86_BUILTIN_MOVNTPD:
20971 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
20972 case IX86_BUILTIN_MOVNTDQ:
20973 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
20974 case IX86_BUILTIN_MOVNTI:
20975 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
20977 case IX86_BUILTIN_LOADDQU:
20978 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
20979 case IX86_BUILTIN_STOREDQU:
20980 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
20982 case IX86_BUILTIN_MONITOR:
20983 arg0 = CALL_EXPR_ARG (exp, 0);
20984 arg1 = CALL_EXPR_ARG (exp, 1);
20985 arg2 = CALL_EXPR_ARG (exp, 2);
20986 op0 = expand_normal (arg0);
20987 op1 = expand_normal (arg1);
20988 op2 = expand_normal (arg2);
20990 op0 = copy_to_mode_reg (Pmode, op0);
20992 op1 = copy_to_mode_reg (SImode, op1);
20994 op2 = copy_to_mode_reg (SImode, op2);
20996 emit_insn (gen_sse3_monitor (op0, op1, op2));
20998 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
21001 case IX86_BUILTIN_MWAIT:
21002 arg0 = CALL_EXPR_ARG (exp, 0);
21003 arg1 = CALL_EXPR_ARG (exp, 1);
21004 op0 = expand_normal (arg0);
21005 op1 = expand_normal (arg1);
21007 op0 = copy_to_mode_reg (SImode, op0);
21009 op1 = copy_to_mode_reg (SImode, op1);
21010 emit_insn (gen_sse3_mwait (op0, op1));
21013 case IX86_BUILTIN_LDDQU:
21014 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
21017 case IX86_BUILTIN_PALIGNR:
21018 case IX86_BUILTIN_PALIGNR128:
21019 if (fcode == IX86_BUILTIN_PALIGNR)
21021 icode = CODE_FOR_ssse3_palignrdi;
21026 icode = CODE_FOR_ssse3_palignrti;
21029 arg0 = CALL_EXPR_ARG (exp, 0);
21030 arg1 = CALL_EXPR_ARG (exp, 1);
21031 arg2 = CALL_EXPR_ARG (exp, 2);
21032 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21033 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21034 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21035 tmode = insn_data[icode].operand[0].mode;
21036 mode1 = insn_data[icode].operand[1].mode;
21037 mode2 = insn_data[icode].operand[2].mode;
21038 mode3 = insn_data[icode].operand[3].mode;
21040 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21042 op0 = copy_to_reg (op0);
21043 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
21045 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21047 op1 = copy_to_reg (op1);
21048 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
21050 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21052 error ("shift must be an immediate");
21055 target = gen_reg_rtx (mode);
21056 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
21063 case IX86_BUILTIN_MOVNTDQA:
21064 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
21067 case IX86_BUILTIN_MOVNTSD:
21068 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21070 case IX86_BUILTIN_MOVNTSS:
21071 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21073 case IX86_BUILTIN_INSERTQ:
21074 case IX86_BUILTIN_EXTRQ:
21075 icode = (fcode == IX86_BUILTIN_EXTRQ
21076 ? CODE_FOR_sse4a_extrq
21077 : CODE_FOR_sse4a_insertq);
21078 arg0 = CALL_EXPR_ARG (exp, 0);
21079 arg1 = CALL_EXPR_ARG (exp, 1);
21080 op0 = expand_normal (arg0);
21081 op1 = expand_normal (arg1);
21082 tmode = insn_data[icode].operand[0].mode;
21083 mode1 = insn_data[icode].operand[1].mode;
21084 mode2 = insn_data[icode].operand[2].mode;
21085 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21086 op0 = copy_to_mode_reg (mode1, op0);
21087 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21088 op1 = copy_to_mode_reg (mode2, op1);
21089 if (optimize || target == 0
21090 || GET_MODE (target) != tmode
21091 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21092 target = gen_reg_rtx (tmode);
21093 pat = GEN_FCN (icode) (target, op0, op1);
21099 case IX86_BUILTIN_EXTRQI:
21100 icode = CODE_FOR_sse4a_extrqi;
21101 arg0 = CALL_EXPR_ARG (exp, 0);
21102 arg1 = CALL_EXPR_ARG (exp, 1);
21103 arg2 = CALL_EXPR_ARG (exp, 2);
21104 op0 = expand_normal (arg0);
21105 op1 = expand_normal (arg1);
21106 op2 = expand_normal (arg2);
21107 tmode = insn_data[icode].operand[0].mode;
21108 mode1 = insn_data[icode].operand[1].mode;
21109 mode2 = insn_data[icode].operand[2].mode;
21110 mode3 = insn_data[icode].operand[3].mode;
21111 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21112 op0 = copy_to_mode_reg (mode1, op0);
21113 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21115 error ("index mask must be an immediate");
21116 return gen_reg_rtx (tmode);
21118 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21120 error ("length mask must be an immediate");
21121 return gen_reg_rtx (tmode);
21123 if (optimize || target == 0
21124 || GET_MODE (target) != tmode
21125 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21126 target = gen_reg_rtx (tmode);
21127 pat = GEN_FCN (icode) (target, op0, op1, op2);
21133 case IX86_BUILTIN_INSERTQI:
21134 icode = CODE_FOR_sse4a_insertqi;
21135 arg0 = CALL_EXPR_ARG (exp, 0);
21136 arg1 = CALL_EXPR_ARG (exp, 1);
21137 arg2 = CALL_EXPR_ARG (exp, 2);
21138 arg3 = CALL_EXPR_ARG (exp, 3);
21139 op0 = expand_normal (arg0);
21140 op1 = expand_normal (arg1);
21141 op2 = expand_normal (arg2);
21142 op3 = expand_normal (arg3);
21143 tmode = insn_data[icode].operand[0].mode;
21144 mode1 = insn_data[icode].operand[1].mode;
21145 mode2 = insn_data[icode].operand[2].mode;
21146 mode3 = insn_data[icode].operand[3].mode;
21147 mode4 = insn_data[icode].operand[4].mode;
21149 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21150 op0 = copy_to_mode_reg (mode1, op0);
21152 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21153 op1 = copy_to_mode_reg (mode2, op1);
21155 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21157 error ("index mask must be an immediate");
21158 return gen_reg_rtx (tmode);
21160 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21162 error ("length mask must be an immediate");
21163 return gen_reg_rtx (tmode);
21165 if (optimize || target == 0
21166 || GET_MODE (target) != tmode
21167 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21168 target = gen_reg_rtx (tmode);
21169 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21175 case IX86_BUILTIN_VEC_INIT_V2SI:
21176 case IX86_BUILTIN_VEC_INIT_V4HI:
21177 case IX86_BUILTIN_VEC_INIT_V8QI:
21178 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21180 case IX86_BUILTIN_VEC_EXT_V2DF:
21181 case IX86_BUILTIN_VEC_EXT_V2DI:
21182 case IX86_BUILTIN_VEC_EXT_V4SF:
21183 case IX86_BUILTIN_VEC_EXT_V4SI:
21184 case IX86_BUILTIN_VEC_EXT_V8HI:
21185 case IX86_BUILTIN_VEC_EXT_V2SI:
21186 case IX86_BUILTIN_VEC_EXT_V4HI:
21187 case IX86_BUILTIN_VEC_EXT_V16QI:
21188 return ix86_expand_vec_ext_builtin (exp, target);
21190 case IX86_BUILTIN_VEC_SET_V2DI:
21191 case IX86_BUILTIN_VEC_SET_V4SF:
21192 case IX86_BUILTIN_VEC_SET_V4SI:
21193 case IX86_BUILTIN_VEC_SET_V8HI:
21194 case IX86_BUILTIN_VEC_SET_V4HI:
21195 case IX86_BUILTIN_VEC_SET_V16QI:
21196 return ix86_expand_vec_set_builtin (exp);
21198 case IX86_BUILTIN_INFQ:
21200 REAL_VALUE_TYPE inf;
21204 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21206 tmp = validize_mem (force_const_mem (mode, tmp));
21209 target = gen_reg_rtx (mode);
21211 emit_move_insn (target, tmp);
21215 case IX86_BUILTIN_FABSQ:
21216 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21218 case IX86_BUILTIN_COPYSIGNQ:
21219 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21225 for (i = 0, d = bdesc_sse_3arg;
21226 i < ARRAY_SIZE (bdesc_sse_3arg);
21228 if (d->code == fcode)
21229 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
21232 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21233 if (d->code == fcode)
21235 /* Compares are treated specially. */
21236 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21237 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21238 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21239 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21240 return ix86_expand_sse_compare (d, exp, target);
21242 return ix86_expand_binop_builtin (d->icode, exp, target);
21245 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21246 if (d->code == fcode)
21247 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21249 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21250 if (d->code == fcode)
21251 return ix86_expand_sse_comi (d, exp, target);
21253 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21254 if (d->code == fcode)
21255 return ix86_expand_sse_ptest (d, exp, target);
21257 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21258 if (d->code == fcode)
21259 return ix86_expand_crc32 (d->icode, exp, target);
21261 for (i = 0, d = bdesc_pcmpestr;
21262 i < ARRAY_SIZE (bdesc_pcmpestr);
21264 if (d->code == fcode)
21265 return ix86_expand_sse_pcmpestr (d, exp, target);
21267 for (i = 0, d = bdesc_pcmpistr;
21268 i < ARRAY_SIZE (bdesc_pcmpistr);
21270 if (d->code == fcode)
21271 return ix86_expand_sse_pcmpistr (d, exp, target);
21273 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21274 if (d->code == fcode)
21275 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21276 (enum multi_arg_type)d->flag,
21279 gcc_unreachable ();
21282 /* Returns a function decl for a vectorized version of the builtin function
21283 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21284 if it is not available. */
21287 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21290 enum machine_mode in_mode, out_mode;
21293 if (TREE_CODE (type_out) != VECTOR_TYPE
21294 || TREE_CODE (type_in) != VECTOR_TYPE)
21297 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21298 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21299 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21300 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21304 case BUILT_IN_SQRT:
21305 if (out_mode == DFmode && out_n == 2
21306 && in_mode == DFmode && in_n == 2)
21307 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21310 case BUILT_IN_SQRTF:
21311 if (out_mode == SFmode && out_n == 4
21312 && in_mode == SFmode && in_n == 4)
21313 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21316 case BUILT_IN_LRINT:
21317 if (out_mode == SImode && out_n == 4
21318 && in_mode == DFmode && in_n == 2)
21319 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21322 case BUILT_IN_LRINTF:
21323 if (out_mode == SImode && out_n == 4
21324 && in_mode == SFmode && in_n == 4)
21325 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21332 /* Dispatch to a handler for a vectorization library. */
21333 if (ix86_veclib_handler)
21334 return (*ix86_veclib_handler)(fn, type_out, type_in);
21339 /* Handler for an ACML-style interface to a library with vectorized
21343 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21345 char name[20] = "__vr.._";
21346 tree fntype, new_fndecl, args;
21349 enum machine_mode el_mode, in_mode;
21352 /* The ACML is 64bits only and suitable for unsafe math only as
21353 it does not correctly support parts of IEEE with the required
21354 precision such as denormals. */
21356 || !flag_unsafe_math_optimizations)
21359 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21360 n = TYPE_VECTOR_SUBPARTS (type_out);
21361 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21362 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21363 if (el_mode != in_mode
21373 case BUILT_IN_LOG2:
21374 case BUILT_IN_LOG10:
21377 if (el_mode != DFmode
21382 case BUILT_IN_SINF:
21383 case BUILT_IN_COSF:
21384 case BUILT_IN_EXPF:
21385 case BUILT_IN_POWF:
21386 case BUILT_IN_LOGF:
21387 case BUILT_IN_LOG2F:
21388 case BUILT_IN_LOG10F:
21391 if (el_mode != SFmode
21400 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21401 sprintf (name + 7, "%s", bname+10);
21404 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21405 args = TREE_CHAIN (args))
21409 fntype = build_function_type_list (type_out, type_in, NULL);
21411 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21413 /* Build a function declaration for the vectorized function. */
21414 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21415 TREE_PUBLIC (new_fndecl) = 1;
21416 DECL_EXTERNAL (new_fndecl) = 1;
21417 DECL_IS_NOVOPS (new_fndecl) = 1;
21418 TREE_READONLY (new_fndecl) = 1;
21424 /* Returns a decl of a function that implements conversion of the
21425 input vector of type TYPE, or NULL_TREE if it is not available. */
21428 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21430 if (TREE_CODE (type) != VECTOR_TYPE)
21436 switch (TYPE_MODE (type))
21439 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21444 case FIX_TRUNC_EXPR:
21445 switch (TYPE_MODE (type))
21448 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21458 /* Returns a code for a target-specific builtin that implements
21459 reciprocal of the function, or NULL_TREE if not available. */
21462 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21463 bool sqrt ATTRIBUTE_UNUSED)
21465 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21466 && flag_finite_math_only && !flag_trapping_math
21467 && flag_unsafe_math_optimizations))
21471 /* Machine dependent builtins. */
21474 /* Vectorized version of sqrt to rsqrt conversion. */
21475 case IX86_BUILTIN_SQRTPS_NR:
21476 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21482 /* Normal builtins. */
21485 /* Sqrt to rsqrt conversion. */
21486 case BUILT_IN_SQRTF:
21487 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21494 /* Store OPERAND to the memory after reload is completed. This means
21495 that we can't easily use assign_stack_local. */
21497 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21501 gcc_assert (reload_completed);
21502 if (TARGET_RED_ZONE)
21504 result = gen_rtx_MEM (mode,
21505 gen_rtx_PLUS (Pmode,
21507 GEN_INT (-RED_ZONE_SIZE)));
21508 emit_move_insn (result, operand);
21510 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21516 operand = gen_lowpart (DImode, operand);
21520 gen_rtx_SET (VOIDmode,
21521 gen_rtx_MEM (DImode,
21522 gen_rtx_PRE_DEC (DImode,
21523 stack_pointer_rtx)),
21527 gcc_unreachable ();
21529 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21538 split_di (&operand, 1, operands, operands + 1);
21540 gen_rtx_SET (VOIDmode,
21541 gen_rtx_MEM (SImode,
21542 gen_rtx_PRE_DEC (Pmode,
21543 stack_pointer_rtx)),
21546 gen_rtx_SET (VOIDmode,
21547 gen_rtx_MEM (SImode,
21548 gen_rtx_PRE_DEC (Pmode,
21549 stack_pointer_rtx)),
21554 /* Store HImodes as SImodes. */
21555 operand = gen_lowpart (SImode, operand);
21559 gen_rtx_SET (VOIDmode,
21560 gen_rtx_MEM (GET_MODE (operand),
21561 gen_rtx_PRE_DEC (SImode,
21562 stack_pointer_rtx)),
21566 gcc_unreachable ();
21568 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21573 /* Free operand from the memory. */
21575 ix86_free_from_memory (enum machine_mode mode)
21577 if (!TARGET_RED_ZONE)
21581 if (mode == DImode || TARGET_64BIT)
21585 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21586 to pop or add instruction if registers are available. */
21587 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21588 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21593 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21594 QImode must go into class Q_REGS.
21595 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21596 movdf to do mem-to-mem moves through integer regs. */
21598 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21600 enum machine_mode mode = GET_MODE (x);
21602 /* We're only allowed to return a subclass of CLASS. Many of the
21603 following checks fail for NO_REGS, so eliminate that early. */
21604 if (regclass == NO_REGS)
21607 /* All classes can load zeros. */
21608 if (x == CONST0_RTX (mode))
21611 /* Force constants into memory if we are loading a (nonzero) constant into
21612 an MMX or SSE register. This is because there are no MMX/SSE instructions
21613 to load from a constant. */
21615 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21618 /* Prefer SSE regs only, if we can use them for math. */
21619 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21620 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21622 /* Floating-point constants need more complex checks. */
21623 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21625 /* General regs can load everything. */
21626 if (reg_class_subset_p (regclass, GENERAL_REGS))
21629 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21630 zero above. We only want to wind up preferring 80387 registers if
21631 we plan on doing computation with them. */
21633 && standard_80387_constant_p (x))
21635 /* Limit class to non-sse. */
21636 if (regclass == FLOAT_SSE_REGS)
21638 if (regclass == FP_TOP_SSE_REGS)
21640 if (regclass == FP_SECOND_SSE_REGS)
21641 return FP_SECOND_REG;
21642 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21649 /* Generally when we see PLUS here, it's the function invariant
21650 (plus soft-fp const_int). Which can only be computed into general
21652 if (GET_CODE (x) == PLUS)
21653 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21655 /* QImode constants are easy to load, but non-constant QImode data
21656 must go into Q_REGS. */
21657 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21659 if (reg_class_subset_p (regclass, Q_REGS))
21661 if (reg_class_subset_p (Q_REGS, regclass))
21669 /* Discourage putting floating-point values in SSE registers unless
21670 SSE math is being used, and likewise for the 387 registers. */
21672 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
21674 enum machine_mode mode = GET_MODE (x);
21676 /* Restrict the output reload class to the register bank that we are doing
21677 math on. If we would like not to return a subset of CLASS, reject this
21678 alternative: if reload cannot do this, it will still use its choice. */
21679 mode = GET_MODE (x);
21680 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21681 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
21683 if (X87_FLOAT_MODE_P (mode))
21685 if (regclass == FP_TOP_SSE_REGS)
21687 else if (regclass == FP_SECOND_SSE_REGS)
21688 return FP_SECOND_REG;
21690 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21696 /* If we are copying between general and FP registers, we need a memory
21697 location. The same is true for SSE and MMX registers.
21699 To optimize register_move_cost performance, allow inline variant.
21701 The macro can't work reliably when one of the CLASSES is class containing
21702 registers from multiple units (SSE, MMX, integer). We avoid this by never
21703 combining those units in single alternative in the machine description.
21704 Ensure that this constraint holds to avoid unexpected surprises.
21706 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21707 enforce these sanity checks. */
21710 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21711 enum machine_mode mode, int strict)
21713 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21714 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21715 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21716 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21717 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21718 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
21720 gcc_assert (!strict);
21724 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21727 /* ??? This is a lie. We do have moves between mmx/general, and for
21728 mmx/sse2. But by saying we need secondary memory we discourage the
21729 register allocator from using the mmx registers unless needed. */
21730 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21733 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21735 /* SSE1 doesn't have any direct moves from other classes. */
21739 /* If the target says that inter-unit moves are more expensive
21740 than moving through memory, then don't generate them. */
21741 if (!TARGET_INTER_UNIT_MOVES)
21744 /* Between SSE and general, we have moves no larger than word size. */
21745 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21753 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21754 enum machine_mode mode, int strict)
21756 return inline_secondary_memory_needed (class1, class2, mode, strict);
21759 /* Return true if the registers in CLASS cannot represent the change from
21760 modes FROM to TO. */
21763 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
21764 enum reg_class regclass)
21769 /* x87 registers can't do subreg at all, as all values are reformatted
21770 to extended precision. */
21771 if (MAYBE_FLOAT_CLASS_P (regclass))
21774 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21776 /* Vector registers do not support QI or HImode loads. If we don't
21777 disallow a change to these modes, reload will assume it's ok to
21778 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21779 the vec_dupv4hi pattern. */
21780 if (GET_MODE_SIZE (from) < 4)
21783 /* Vector registers do not support subreg with nonzero offsets, which
21784 are otherwise valid for integer registers. Since we can't see
21785 whether we have a nonzero offset from here, prohibit all
21786 nonparadoxical subregs changing size. */
21787 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
21794 /* Return the cost of moving data of mode M between a
21795 register and memory. A value of 2 is the default; this cost is
21796 relative to those in `REGISTER_MOVE_COST'.
21798 This function is used extensively by register_move_cost that is used to
21799 build tables at startup. Make it inline in this case.
21800 When IN is 2, return maximum of in and out move cost.
21802 If moving between registers and memory is more expensive than
21803 between two registers, you should define this macro to express the
21806 Model also increased moving costs of QImode registers in non
21810 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
21814 if (FLOAT_CLASS_P (regclass))
21832 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
21833 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
21835 if (SSE_CLASS_P (regclass))
21838 switch (GET_MODE_SIZE (mode))
21853 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
21854 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
21856 if (MMX_CLASS_P (regclass))
21859 switch (GET_MODE_SIZE (mode))
21871 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
21872 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
21874 switch (GET_MODE_SIZE (mode))
21877 if (Q_CLASS_P (regclass) || TARGET_64BIT)
21880 return ix86_cost->int_store[0];
21881 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
21882 cost = ix86_cost->movzbl_load;
21884 cost = ix86_cost->int_load[0];
21886 return MAX (cost, ix86_cost->int_store[0]);
21892 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
21894 return ix86_cost->movzbl_load;
21896 return ix86_cost->int_store[0] + 4;
21901 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
21902 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
21904 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21905 if (mode == TFmode)
21908 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
21910 cost = ix86_cost->int_load[2];
21912 cost = ix86_cost->int_store[2];
21913 return (cost * (((int) GET_MODE_SIZE (mode)
21914 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
21919 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
21921 return inline_memory_move_cost (mode, regclass, in);
21925 /* Return the cost of moving data from a register in class CLASS1 to
21926 one in class CLASS2.
21928 It is not required that the cost always equal 2 when FROM is the same as TO;
21929 on some machines it is expensive to move between registers if they are not
21930 general registers. */
21933 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
21934 enum reg_class class2)
21936 /* In case we require secondary memory, compute cost of the store followed
21937 by load. In order to avoid bad register allocation choices, we need
21938 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21940 if (inline_secondary_memory_needed (class1, class2, mode, 0))
21944 cost += inline_memory_move_cost (mode, class1, 2);
21945 cost += inline_memory_move_cost (mode, class2, 2);
21947 /* In case of copying from general_purpose_register we may emit multiple
21948 stores followed by single load causing memory size mismatch stall.
21949 Count this as arbitrarily high cost of 20. */
21950 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
21953 /* In the case of FP/MMX moves, the registers actually overlap, and we
21954 have to switch modes in order to treat them differently. */
21955 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21956 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21962 /* Moves between SSE/MMX and integer unit are expensive. */
21963 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
21964 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21966 /* ??? By keeping returned value relatively high, we limit the number
21967 of moves between integer and MMX/SSE registers for all targets.
21968 Additionally, high value prevents problem with x86_modes_tieable_p(),
21969 where integer modes in MMX/SSE registers are not tieable
21970 because of missing QImode and HImode moves to, from or between
21971 MMX/SSE registers. */
21972 return MAX (ix86_cost->mmxsse_to_integer, 8);
21974 if (MAYBE_FLOAT_CLASS_P (class1))
21975 return ix86_cost->fp_move;
21976 if (MAYBE_SSE_CLASS_P (class1))
21977 return ix86_cost->sse_move;
21978 if (MAYBE_MMX_CLASS_P (class1))
21979 return ix86_cost->mmx_move;
21983 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
21986 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
21988 /* Flags and only flags can only hold CCmode values. */
21989 if (CC_REGNO_P (regno))
21990 return GET_MODE_CLASS (mode) == MODE_CC;
21991 if (GET_MODE_CLASS (mode) == MODE_CC
21992 || GET_MODE_CLASS (mode) == MODE_RANDOM
21993 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21995 if (FP_REGNO_P (regno))
21996 return VALID_FP_MODE_P (mode);
21997 if (SSE_REGNO_P (regno))
21999 /* We implement the move patterns for all vector modes into and
22000 out of SSE registers, even when no operation instructions
22002 return (VALID_SSE_REG_MODE (mode)
22003 || VALID_SSE2_REG_MODE (mode)
22004 || VALID_MMX_REG_MODE (mode)
22005 || VALID_MMX_REG_MODE_3DNOW (mode));
22007 if (MMX_REGNO_P (regno))
22009 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22010 so if the register is available at all, then we can move data of
22011 the given mode into or out of it. */
22012 return (VALID_MMX_REG_MODE (mode)
22013 || VALID_MMX_REG_MODE_3DNOW (mode));
22016 if (mode == QImode)
22018 /* Take care for QImode values - they can be in non-QI regs,
22019 but then they do cause partial register stalls. */
22020 if (regno < 4 || TARGET_64BIT)
22022 if (!TARGET_PARTIAL_REG_STALL)
22024 return reload_in_progress || reload_completed;
22026 /* We handle both integer and floats in the general purpose registers. */
22027 else if (VALID_INT_MODE_P (mode))
22029 else if (VALID_FP_MODE_P (mode))
22031 else if (VALID_DFP_MODE_P (mode))
22033 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22034 on to use that value in smaller contexts, this can easily force a
22035 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22036 supporting DImode, allow it. */
22037 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22043 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22044 tieable integer mode. */
22047 ix86_tieable_integer_mode_p (enum machine_mode mode)
22056 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22059 return TARGET_64BIT;
22066 /* Return true if MODE1 is accessible in a register that can hold MODE2
22067 without copying. That is, all register classes that can hold MODE2
22068 can also hold MODE1. */
22071 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22073 if (mode1 == mode2)
22076 if (ix86_tieable_integer_mode_p (mode1)
22077 && ix86_tieable_integer_mode_p (mode2))
22080 /* MODE2 being XFmode implies fp stack or general regs, which means we
22081 can tie any smaller floating point modes to it. Note that we do not
22082 tie this with TFmode. */
22083 if (mode2 == XFmode)
22084 return mode1 == SFmode || mode1 == DFmode;
22086 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22087 that we can tie it with SFmode. */
22088 if (mode2 == DFmode)
22089 return mode1 == SFmode;
22091 /* If MODE2 is only appropriate for an SSE register, then tie with
22092 any other mode acceptable to SSE registers. */
22093 if (GET_MODE_SIZE (mode2) == 16
22094 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22095 return (GET_MODE_SIZE (mode1) == 16
22096 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22098 /* If MODE2 is appropriate for an MMX register, then tie
22099 with any other mode acceptable to MMX registers. */
22100 if (GET_MODE_SIZE (mode2) == 8
22101 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22102 return (GET_MODE_SIZE (mode1) == 8
22103 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22108 /* Compute a (partial) cost for rtx X. Return true if the complete
22109 cost has been computed, and false if subexpressions should be
22110 scanned. In either case, *TOTAL contains the cost result. */
22113 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22115 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22116 enum machine_mode mode = GET_MODE (x);
22124 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22126 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22128 else if (flag_pic && SYMBOLIC_CONST (x)
22130 || (!GET_CODE (x) != LABEL_REF
22131 && (GET_CODE (x) != SYMBOL_REF
22132 || !SYMBOL_REF_LOCAL_P (x)))))
22139 if (mode == VOIDmode)
22142 switch (standard_80387_constant_p (x))
22147 default: /* Other constants */
22152 /* Start with (MEM (SYMBOL_REF)), since that's where
22153 it'll probably end up. Add a penalty for size. */
22154 *total = (COSTS_N_INSNS (1)
22155 + (flag_pic != 0 && !TARGET_64BIT)
22156 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22162 /* The zero extensions is often completely free on x86_64, so make
22163 it as cheap as possible. */
22164 if (TARGET_64BIT && mode == DImode
22165 && GET_MODE (XEXP (x, 0)) == SImode)
22167 else if (TARGET_ZERO_EXTEND_WITH_AND)
22168 *total = ix86_cost->add;
22170 *total = ix86_cost->movzx;
22174 *total = ix86_cost->movsx;
22178 if (CONST_INT_P (XEXP (x, 1))
22179 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22181 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22184 *total = ix86_cost->add;
22187 if ((value == 2 || value == 3)
22188 && ix86_cost->lea <= ix86_cost->shift_const)
22190 *total = ix86_cost->lea;
22200 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22202 if (CONST_INT_P (XEXP (x, 1)))
22204 if (INTVAL (XEXP (x, 1)) > 32)
22205 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22207 *total = ix86_cost->shift_const * 2;
22211 if (GET_CODE (XEXP (x, 1)) == AND)
22212 *total = ix86_cost->shift_var * 2;
22214 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22219 if (CONST_INT_P (XEXP (x, 1)))
22220 *total = ix86_cost->shift_const;
22222 *total = ix86_cost->shift_var;
22227 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22229 /* ??? SSE scalar cost should be used here. */
22230 *total = ix86_cost->fmul;
22233 else if (X87_FLOAT_MODE_P (mode))
22235 *total = ix86_cost->fmul;
22238 else if (FLOAT_MODE_P (mode))
22240 /* ??? SSE vector cost should be used here. */
22241 *total = ix86_cost->fmul;
22246 rtx op0 = XEXP (x, 0);
22247 rtx op1 = XEXP (x, 1);
22249 if (CONST_INT_P (XEXP (x, 1)))
22251 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22252 for (nbits = 0; value != 0; value &= value - 1)
22256 /* This is arbitrary. */
22259 /* Compute costs correctly for widening multiplication. */
22260 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22261 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22262 == GET_MODE_SIZE (mode))
22264 int is_mulwiden = 0;
22265 enum machine_mode inner_mode = GET_MODE (op0);
22267 if (GET_CODE (op0) == GET_CODE (op1))
22268 is_mulwiden = 1, op1 = XEXP (op1, 0);
22269 else if (CONST_INT_P (op1))
22271 if (GET_CODE (op0) == SIGN_EXTEND)
22272 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22275 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22279 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22282 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22283 + nbits * ix86_cost->mult_bit
22284 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22293 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22294 /* ??? SSE cost should be used here. */
22295 *total = ix86_cost->fdiv;
22296 else if (X87_FLOAT_MODE_P (mode))
22297 *total = ix86_cost->fdiv;
22298 else if (FLOAT_MODE_P (mode))
22299 /* ??? SSE vector cost should be used here. */
22300 *total = ix86_cost->fdiv;
22302 *total = ix86_cost->divide[MODE_INDEX (mode)];
22306 if (GET_MODE_CLASS (mode) == MODE_INT
22307 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22309 if (GET_CODE (XEXP (x, 0)) == PLUS
22310 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22311 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22312 && CONSTANT_P (XEXP (x, 1)))
22314 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22315 if (val == 2 || val == 4 || val == 8)
22317 *total = ix86_cost->lea;
22318 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22319 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22321 *total += rtx_cost (XEXP (x, 1), outer_code);
22325 else if (GET_CODE (XEXP (x, 0)) == MULT
22326 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22328 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22329 if (val == 2 || val == 4 || val == 8)
22331 *total = ix86_cost->lea;
22332 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22333 *total += rtx_cost (XEXP (x, 1), outer_code);
22337 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22339 *total = ix86_cost->lea;
22340 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22341 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22342 *total += rtx_cost (XEXP (x, 1), outer_code);
22349 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22351 /* ??? SSE cost should be used here. */
22352 *total = ix86_cost->fadd;
22355 else if (X87_FLOAT_MODE_P (mode))
22357 *total = ix86_cost->fadd;
22360 else if (FLOAT_MODE_P (mode))
22362 /* ??? SSE vector cost should be used here. */
22363 *total = ix86_cost->fadd;
22371 if (!TARGET_64BIT && mode == DImode)
22373 *total = (ix86_cost->add * 2
22374 + (rtx_cost (XEXP (x, 0), outer_code)
22375 << (GET_MODE (XEXP (x, 0)) != DImode))
22376 + (rtx_cost (XEXP (x, 1), outer_code)
22377 << (GET_MODE (XEXP (x, 1)) != DImode)));
22383 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22385 /* ??? SSE cost should be used here. */
22386 *total = ix86_cost->fchs;
22389 else if (X87_FLOAT_MODE_P (mode))
22391 *total = ix86_cost->fchs;
22394 else if (FLOAT_MODE_P (mode))
22396 /* ??? SSE vector cost should be used here. */
22397 *total = ix86_cost->fchs;
22403 if (!TARGET_64BIT && mode == DImode)
22404 *total = ix86_cost->add * 2;
22406 *total = ix86_cost->add;
22410 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22411 && XEXP (XEXP (x, 0), 1) == const1_rtx
22412 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22413 && XEXP (x, 1) == const0_rtx)
22415 /* This kind of construct is implemented using test[bwl].
22416 Treat it as if we had an AND. */
22417 *total = (ix86_cost->add
22418 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22419 + rtx_cost (const1_rtx, outer_code));
22425 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22430 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22431 /* ??? SSE cost should be used here. */
22432 *total = ix86_cost->fabs;
22433 else if (X87_FLOAT_MODE_P (mode))
22434 *total = ix86_cost->fabs;
22435 else if (FLOAT_MODE_P (mode))
22436 /* ??? SSE vector cost should be used here. */
22437 *total = ix86_cost->fabs;
22441 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22442 /* ??? SSE cost should be used here. */
22443 *total = ix86_cost->fsqrt;
22444 else if (X87_FLOAT_MODE_P (mode))
22445 *total = ix86_cost->fsqrt;
22446 else if (FLOAT_MODE_P (mode))
22447 /* ??? SSE vector cost should be used here. */
22448 *total = ix86_cost->fsqrt;
22452 if (XINT (x, 1) == UNSPEC_TP)
22463 static int current_machopic_label_num;
22465 /* Given a symbol name and its associated stub, write out the
22466 definition of the stub. */
22469 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22471 unsigned int length;
22472 char *binder_name, *symbol_name, lazy_ptr_name[32];
22473 int label = ++current_machopic_label_num;
22475 /* For 64-bit we shouldn't get here. */
22476 gcc_assert (!TARGET_64BIT);
22478 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22479 symb = (*targetm.strip_name_encoding) (symb);
22481 length = strlen (stub);
22482 binder_name = alloca (length + 32);
22483 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22485 length = strlen (symb);
22486 symbol_name = alloca (length + 32);
22487 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22489 sprintf (lazy_ptr_name, "L%d$lz", label);
22492 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22494 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22496 fprintf (file, "%s:\n", stub);
22497 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22501 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22502 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22503 fprintf (file, "\tjmp\t*%%edx\n");
22506 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22508 fprintf (file, "%s:\n", binder_name);
22512 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22513 fprintf (file, "\tpushl\t%%eax\n");
22516 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22518 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22520 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22521 fprintf (file, "%s:\n", lazy_ptr_name);
22522 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22523 fprintf (file, "\t.long %s\n", binder_name);
22527 darwin_x86_file_end (void)
22529 darwin_file_end ();
22532 #endif /* TARGET_MACHO */
22534 /* Order the registers for register allocator. */
22537 x86_order_regs_for_local_alloc (void)
22542 /* First allocate the local general purpose registers. */
22543 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22544 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22545 reg_alloc_order [pos++] = i;
22547 /* Global general purpose registers. */
22548 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22549 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22550 reg_alloc_order [pos++] = i;
22552 /* x87 registers come first in case we are doing FP math
22554 if (!TARGET_SSE_MATH)
22555 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22556 reg_alloc_order [pos++] = i;
22558 /* SSE registers. */
22559 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22560 reg_alloc_order [pos++] = i;
22561 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22562 reg_alloc_order [pos++] = i;
22564 /* x87 registers. */
22565 if (TARGET_SSE_MATH)
22566 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22567 reg_alloc_order [pos++] = i;
22569 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22570 reg_alloc_order [pos++] = i;
22572 /* Initialize the rest of array as we do not allocate some registers
22574 while (pos < FIRST_PSEUDO_REGISTER)
22575 reg_alloc_order [pos++] = 0;
22578 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22579 struct attribute_spec.handler. */
22581 ix86_handle_struct_attribute (tree *node, tree name,
22582 tree args ATTRIBUTE_UNUSED,
22583 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22586 if (DECL_P (*node))
22588 if (TREE_CODE (*node) == TYPE_DECL)
22589 type = &TREE_TYPE (*node);
22594 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22595 || TREE_CODE (*type) == UNION_TYPE)))
22597 warning (OPT_Wattributes, "%qs attribute ignored",
22598 IDENTIFIER_POINTER (name));
22599 *no_add_attrs = true;
22602 else if ((is_attribute_p ("ms_struct", name)
22603 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22604 || ((is_attribute_p ("gcc_struct", name)
22605 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22607 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22608 IDENTIFIER_POINTER (name));
22609 *no_add_attrs = true;
22616 ix86_ms_bitfield_layout_p (const_tree record_type)
22618 return (TARGET_MS_BITFIELD_LAYOUT &&
22619 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22620 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22623 /* Returns an expression indicating where the this parameter is
22624 located on entry to the FUNCTION. */
22627 x86_this_parameter (tree function)
22629 tree type = TREE_TYPE (function);
22630 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22634 const int *parm_regs;
22636 if (TARGET_64BIT_MS_ABI)
22637 parm_regs = x86_64_ms_abi_int_parameter_registers;
22639 parm_regs = x86_64_int_parameter_registers;
22640 return gen_rtx_REG (DImode, parm_regs[aggr]);
22643 if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
22645 int regno = AX_REG;
22646 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22648 return gen_rtx_REG (SImode, regno);
22651 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22654 /* Determine whether x86_output_mi_thunk can succeed. */
22657 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
22658 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
22659 HOST_WIDE_INT vcall_offset, const_tree function)
22661 /* 64-bit can handle anything. */
22665 /* For 32-bit, everything's fine if we have one free register. */
22666 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22669 /* Need a free register for vcall_offset. */
22673 /* Need a free register for GOT references. */
22674 if (flag_pic && !(*targetm.binds_local_p) (function))
22677 /* Otherwise ok. */
22681 /* Output the assembler code for a thunk function. THUNK_DECL is the
22682 declaration for the thunk function itself, FUNCTION is the decl for
22683 the target function. DELTA is an immediate constant offset to be
22684 added to THIS. If VCALL_OFFSET is nonzero, the word at
22685 *(*this + vcall_offset) should be added to THIS. */
22688 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
22689 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
22690 HOST_WIDE_INT vcall_offset, tree function)
22693 rtx this_param = x86_this_parameter (function);
22696 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22697 pull it in now and let DELTA benefit. */
22698 if (REG_P (this_param))
22699 this_reg = this_param;
22700 else if (vcall_offset)
22702 /* Put the this parameter into %eax. */
22703 xops[0] = this_param;
22704 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
22705 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22708 this_reg = NULL_RTX;
22710 /* Adjust the this parameter by a fixed constant. */
22713 xops[0] = GEN_INT (delta);
22714 xops[1] = this_reg ? this_reg : this_param;
22717 if (!x86_64_general_operand (xops[0], DImode))
22719 tmp = gen_rtx_REG (DImode, R10_REG);
22721 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
22723 xops[1] = this_param;
22725 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22728 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22731 /* Adjust the this parameter by a value stored in the vtable. */
22735 tmp = gen_rtx_REG (DImode, R10_REG);
22738 int tmp_regno = CX_REG;
22739 if (lookup_attribute ("fastcall",
22740 TYPE_ATTRIBUTES (TREE_TYPE (function))))
22741 tmp_regno = AX_REG;
22742 tmp = gen_rtx_REG (SImode, tmp_regno);
22745 xops[0] = gen_rtx_MEM (Pmode, this_reg);
22748 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22750 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22752 /* Adjust the this parameter. */
22753 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
22754 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
22756 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
22757 xops[0] = GEN_INT (vcall_offset);
22759 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22760 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
22762 xops[1] = this_reg;
22764 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22766 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22769 /* If necessary, drop THIS back to its stack slot. */
22770 if (this_reg && this_reg != this_param)
22772 xops[0] = this_reg;
22773 xops[1] = this_param;
22774 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22777 xops[0] = XEXP (DECL_RTL (function), 0);
22780 if (!flag_pic || (*targetm.binds_local_p) (function))
22781 output_asm_insn ("jmp\t%P0", xops);
22782 /* All thunks should be in the same object as their target,
22783 and thus binds_local_p should be true. */
22784 else if (TARGET_64BIT_MS_ABI)
22785 gcc_unreachable ();
22788 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
22789 tmp = gen_rtx_CONST (Pmode, tmp);
22790 tmp = gen_rtx_MEM (QImode, tmp);
22792 output_asm_insn ("jmp\t%A0", xops);
22797 if (!flag_pic || (*targetm.binds_local_p) (function))
22798 output_asm_insn ("jmp\t%P0", xops);
22803 rtx sym_ref = XEXP (DECL_RTL (function), 0);
22804 tmp = (gen_rtx_SYMBOL_REF
22806 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
22807 tmp = gen_rtx_MEM (QImode, tmp);
22809 output_asm_insn ("jmp\t%0", xops);
22812 #endif /* TARGET_MACHO */
22814 tmp = gen_rtx_REG (SImode, CX_REG);
22815 output_set_got (tmp, NULL_RTX);
22818 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
22819 output_asm_insn ("jmp\t{*}%1", xops);
22825 x86_file_start (void)
22827 default_file_start ();
22829 darwin_file_start ();
22831 if (X86_FILE_START_VERSION_DIRECTIVE)
22832 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22833 if (X86_FILE_START_FLTUSED)
22834 fputs ("\t.global\t__fltused\n", asm_out_file);
22835 if (ix86_asm_dialect == ASM_INTEL)
22836 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
22840 x86_field_alignment (tree field, int computed)
22842 enum machine_mode mode;
22843 tree type = TREE_TYPE (field);
22845 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22847 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
22848 ? get_inner_array_type (type) : type);
22849 if (mode == DFmode || mode == DCmode
22850 || GET_MODE_CLASS (mode) == MODE_INT
22851 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22852 return MIN (32, computed);
22856 /* Output assembler code to FILE to increment profiler label # LABELNO
22857 for profiling a function entry. */
22859 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22863 #ifndef NO_PROFILE_COUNTERS
22864 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
22867 if (!TARGET_64BIT_MS_ABI && flag_pic)
22868 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
22870 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22874 #ifndef NO_PROFILE_COUNTERS
22875 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22876 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
22878 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
22882 #ifndef NO_PROFILE_COUNTERS
22883 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
22884 PROFILE_COUNT_REGISTER);
22886 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22890 /* We don't have exact information about the insn sizes, but we may assume
22891 quite safely that we are informed about all 1 byte insns and memory
22892 address sizes. This is enough to eliminate unnecessary padding in
22896 min_insn_size (rtx insn)
22900 if (!INSN_P (insn) || !active_insn_p (insn))
22903 /* Discard alignments we've emit and jump instructions. */
22904 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22905 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22908 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
22909 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
22912 /* Important case - calls are always 5 bytes.
22913 It is common to have many calls in the row. */
22915 && symbolic_reference_mentioned_p (PATTERN (insn))
22916 && !SIBLING_CALL_P (insn))
22918 if (get_attr_length (insn) <= 1)
22921 /* For normal instructions we may rely on the sizes of addresses
22922 and the presence of symbol to require 4 bytes of encoding.
22923 This is not the case for jumps where references are PC relative. */
22924 if (!JUMP_P (insn))
22926 l = get_attr_length_address (insn);
22927 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22936 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22940 ix86_avoid_jump_misspredicts (void)
22942 rtx insn, start = get_insns ();
22943 int nbytes = 0, njumps = 0;
22946 /* Look for all minimal intervals of instructions containing 4 jumps.
22947 The intervals are bounded by START and INSN. NBYTES is the total
22948 size of instructions in the interval including INSN and not including
22949 START. When the NBYTES is smaller than 16 bytes, it is possible
22950 that the end of START and INSN ends up in the same 16byte page.
22952 The smallest offset in the page INSN can start is the case where START
22953 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22954 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22956 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22959 nbytes += min_insn_size (insn);
22961 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
22962 INSN_UID (insn), min_insn_size (insn));
22964 && GET_CODE (PATTERN (insn)) != ADDR_VEC
22965 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
22973 start = NEXT_INSN (start);
22974 if ((JUMP_P (start)
22975 && GET_CODE (PATTERN (start)) != ADDR_VEC
22976 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
22978 njumps--, isjump = 1;
22981 nbytes -= min_insn_size (start);
22983 gcc_assert (njumps >= 0);
22985 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
22986 INSN_UID (start), INSN_UID (insn), nbytes);
22988 if (njumps == 3 && isjump && nbytes < 16)
22990 int padsize = 15 - nbytes + min_insn_size (insn);
22993 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
22994 INSN_UID (insn), padsize);
22995 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23000 /* AMD Athlon works faster
23001 when RET is not destination of conditional jump or directly preceded
23002 by other jump instruction. We avoid the penalty by inserting NOP just
23003 before the RET instructions in such cases. */
23005 ix86_pad_returns (void)
23010 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23012 basic_block bb = e->src;
23013 rtx ret = BB_END (bb);
23015 bool replace = false;
23017 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23018 || !maybe_hot_bb_p (bb))
23020 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23021 if (active_insn_p (prev) || LABEL_P (prev))
23023 if (prev && LABEL_P (prev))
23028 FOR_EACH_EDGE (e, ei, bb->preds)
23029 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23030 && !(e->flags & EDGE_FALLTHRU))
23035 prev = prev_active_insn (ret);
23037 && ((JUMP_P (prev) && any_condjump_p (prev))
23040 /* Empty functions get branch mispredict even when the jump destination
23041 is not visible to us. */
23042 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23047 emit_insn_before (gen_return_internal_long (), ret);
23053 /* Implement machine specific optimizations. We implement padding of returns
23054 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23058 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23059 ix86_pad_returns ();
23060 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23061 ix86_avoid_jump_misspredicts ();
23064 /* Return nonzero when QImode register that must be represented via REX prefix
23067 x86_extended_QIreg_mentioned_p (rtx insn)
23070 extract_insn_cached (insn);
23071 for (i = 0; i < recog_data.n_operands; i++)
23072 if (REG_P (recog_data.operand[i])
23073 && REGNO (recog_data.operand[i]) >= 4)
23078 /* Return nonzero when P points to register encoded via REX prefix.
23079 Called via for_each_rtx. */
23081 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23083 unsigned int regno;
23086 regno = REGNO (*p);
23087 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23090 /* Return true when INSN mentions register that must be encoded using REX
23093 x86_extended_reg_mentioned_p (rtx insn)
23095 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23098 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23099 optabs would emit if we didn't have TFmode patterns. */
23102 x86_emit_floatuns (rtx operands[2])
23104 rtx neglab, donelab, i0, i1, f0, in, out;
23105 enum machine_mode mode, inmode;
23107 inmode = GET_MODE (operands[1]);
23108 gcc_assert (inmode == SImode || inmode == DImode);
23111 in = force_reg (inmode, operands[1]);
23112 mode = GET_MODE (out);
23113 neglab = gen_label_rtx ();
23114 donelab = gen_label_rtx ();
23115 f0 = gen_reg_rtx (mode);
23117 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23119 expand_float (out, in, 0);
23121 emit_jump_insn (gen_jump (donelab));
23124 emit_label (neglab);
23126 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23128 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23130 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23132 expand_float (f0, i0, 0);
23134 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23136 emit_label (donelab);
23139 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23140 with all elements equal to VAR. Return true if successful. */
23143 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23144 rtx target, rtx val)
23146 enum machine_mode smode, wsmode, wvmode;
23161 val = force_reg (GET_MODE_INNER (mode), val);
23162 x = gen_rtx_VEC_DUPLICATE (mode, val);
23163 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23169 if (TARGET_SSE || TARGET_3DNOW_A)
23171 val = gen_lowpart (SImode, val);
23172 x = gen_rtx_TRUNCATE (HImode, val);
23173 x = gen_rtx_VEC_DUPLICATE (mode, x);
23174 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23196 /* Extend HImode to SImode using a paradoxical SUBREG. */
23197 tmp1 = gen_reg_rtx (SImode);
23198 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23199 /* Insert the SImode value as low element of V4SImode vector. */
23200 tmp2 = gen_reg_rtx (V4SImode);
23201 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23202 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23203 CONST0_RTX (V4SImode),
23205 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23206 /* Cast the V4SImode vector back to a V8HImode vector. */
23207 tmp1 = gen_reg_rtx (V8HImode);
23208 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23209 /* Duplicate the low short through the whole low SImode word. */
23210 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23211 /* Cast the V8HImode vector back to a V4SImode vector. */
23212 tmp2 = gen_reg_rtx (V4SImode);
23213 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23214 /* Replicate the low element of the V4SImode vector. */
23215 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23216 /* Cast the V2SImode back to V8HImode, and store in target. */
23217 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23228 /* Extend QImode to SImode using a paradoxical SUBREG. */
23229 tmp1 = gen_reg_rtx (SImode);
23230 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23231 /* Insert the SImode value as low element of V4SImode vector. */
23232 tmp2 = gen_reg_rtx (V4SImode);
23233 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23234 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23235 CONST0_RTX (V4SImode),
23237 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23238 /* Cast the V4SImode vector back to a V16QImode vector. */
23239 tmp1 = gen_reg_rtx (V16QImode);
23240 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23241 /* Duplicate the low byte through the whole low SImode word. */
23242 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23243 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23244 /* Cast the V16QImode vector back to a V4SImode vector. */
23245 tmp2 = gen_reg_rtx (V4SImode);
23246 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23247 /* Replicate the low element of the V4SImode vector. */
23248 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23249 /* Cast the V2SImode back to V16QImode, and store in target. */
23250 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23258 /* Replicate the value once into the next wider mode and recurse. */
23259 val = convert_modes (wsmode, smode, val, true);
23260 x = expand_simple_binop (wsmode, ASHIFT, val,
23261 GEN_INT (GET_MODE_BITSIZE (smode)),
23262 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23263 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23265 x = gen_reg_rtx (wvmode);
23266 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23267 gcc_unreachable ();
23268 emit_move_insn (target, gen_lowpart (mode, x));
23276 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23277 whose ONE_VAR element is VAR, and other elements are zero. Return true
23281 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23282 rtx target, rtx var, int one_var)
23284 enum machine_mode vsimode;
23300 var = force_reg (GET_MODE_INNER (mode), var);
23301 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23302 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23307 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23308 new_target = gen_reg_rtx (mode);
23310 new_target = target;
23311 var = force_reg (GET_MODE_INNER (mode), var);
23312 x = gen_rtx_VEC_DUPLICATE (mode, var);
23313 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23314 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23317 /* We need to shuffle the value to the correct position, so
23318 create a new pseudo to store the intermediate result. */
23320 /* With SSE2, we can use the integer shuffle insns. */
23321 if (mode != V4SFmode && TARGET_SSE2)
23323 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23325 GEN_INT (one_var == 1 ? 0 : 1),
23326 GEN_INT (one_var == 2 ? 0 : 1),
23327 GEN_INT (one_var == 3 ? 0 : 1)));
23328 if (target != new_target)
23329 emit_move_insn (target, new_target);
23333 /* Otherwise convert the intermediate result to V4SFmode and
23334 use the SSE1 shuffle instructions. */
23335 if (mode != V4SFmode)
23337 tmp = gen_reg_rtx (V4SFmode);
23338 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23343 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23345 GEN_INT (one_var == 1 ? 0 : 1),
23346 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23347 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23349 if (mode != V4SFmode)
23350 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23351 else if (tmp != target)
23352 emit_move_insn (target, tmp);
23354 else if (target != new_target)
23355 emit_move_insn (target, new_target);
23360 vsimode = V4SImode;
23366 vsimode = V2SImode;
23372 /* Zero extend the variable element to SImode and recurse. */
23373 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23375 x = gen_reg_rtx (vsimode);
23376 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23378 gcc_unreachable ();
23380 emit_move_insn (target, gen_lowpart (mode, x));
23388 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23389 consisting of the values in VALS. It is known that all elements
23390 except ONE_VAR are constants. Return true if successful. */
23393 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23394 rtx target, rtx vals, int one_var)
23396 rtx var = XVECEXP (vals, 0, one_var);
23397 enum machine_mode wmode;
23400 const_vec = copy_rtx (vals);
23401 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23402 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23410 /* For the two element vectors, it's just as easy to use
23411 the general case. */
23427 /* There's no way to set one QImode entry easily. Combine
23428 the variable value with its adjacent constant value, and
23429 promote to an HImode set. */
23430 x = XVECEXP (vals, 0, one_var ^ 1);
23433 var = convert_modes (HImode, QImode, var, true);
23434 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23435 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23436 x = GEN_INT (INTVAL (x) & 0xff);
23440 var = convert_modes (HImode, QImode, var, true);
23441 x = gen_int_mode (INTVAL (x) << 8, HImode);
23443 if (x != const0_rtx)
23444 var = expand_simple_binop (HImode, IOR, var, x, var,
23445 1, OPTAB_LIB_WIDEN);
23447 x = gen_reg_rtx (wmode);
23448 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23449 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23451 emit_move_insn (target, gen_lowpart (mode, x));
23458 emit_move_insn (target, const_vec);
23459 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23463 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23464 all values variable, and none identical. */
23467 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23468 rtx target, rtx vals)
23470 enum machine_mode half_mode = GET_MODE_INNER (mode);
23471 rtx op0 = NULL, op1 = NULL;
23472 bool use_vec_concat = false;
23478 if (!mmx_ok && !TARGET_SSE)
23484 /* For the two element vectors, we always implement VEC_CONCAT. */
23485 op0 = XVECEXP (vals, 0, 0);
23486 op1 = XVECEXP (vals, 0, 1);
23487 use_vec_concat = true;
23491 half_mode = V2SFmode;
23494 half_mode = V2SImode;
23500 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23501 Recurse to load the two halves. */
23503 op0 = gen_reg_rtx (half_mode);
23504 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23505 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23507 op1 = gen_reg_rtx (half_mode);
23508 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23509 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23511 use_vec_concat = true;
23522 gcc_unreachable ();
23525 if (use_vec_concat)
23527 if (!register_operand (op0, half_mode))
23528 op0 = force_reg (half_mode, op0);
23529 if (!register_operand (op1, half_mode))
23530 op1 = force_reg (half_mode, op1);
23532 emit_insn (gen_rtx_SET (VOIDmode, target,
23533 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23537 int i, j, n_elts, n_words, n_elt_per_word;
23538 enum machine_mode inner_mode;
23539 rtx words[4], shift;
23541 inner_mode = GET_MODE_INNER (mode);
23542 n_elts = GET_MODE_NUNITS (mode);
23543 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23544 n_elt_per_word = n_elts / n_words;
23545 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23547 for (i = 0; i < n_words; ++i)
23549 rtx word = NULL_RTX;
23551 for (j = 0; j < n_elt_per_word; ++j)
23553 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23554 elt = convert_modes (word_mode, inner_mode, elt, true);
23560 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23561 word, 1, OPTAB_LIB_WIDEN);
23562 word = expand_simple_binop (word_mode, IOR, word, elt,
23563 word, 1, OPTAB_LIB_WIDEN);
23571 emit_move_insn (target, gen_lowpart (mode, words[0]));
23572 else if (n_words == 2)
23574 rtx tmp = gen_reg_rtx (mode);
23575 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23576 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23577 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23578 emit_move_insn (target, tmp);
23580 else if (n_words == 4)
23582 rtx tmp = gen_reg_rtx (V4SImode);
23583 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23584 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23585 emit_move_insn (target, gen_lowpart (mode, tmp));
23588 gcc_unreachable ();
23592 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23593 instructions unless MMX_OK is true. */
23596 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23598 enum machine_mode mode = GET_MODE (target);
23599 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23600 int n_elts = GET_MODE_NUNITS (mode);
23601 int n_var = 0, one_var = -1;
23602 bool all_same = true, all_const_zero = true;
23606 for (i = 0; i < n_elts; ++i)
23608 x = XVECEXP (vals, 0, i);
23609 if (!(CONST_INT_P (x)
23610 || GET_CODE (x) == CONST_DOUBLE
23611 || GET_CODE (x) == CONST_FIXED))
23612 n_var++, one_var = i;
23613 else if (x != CONST0_RTX (inner_mode))
23614 all_const_zero = false;
23615 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23619 /* Constants are best loaded from the constant pool. */
23622 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23626 /* If all values are identical, broadcast the value. */
23628 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23629 XVECEXP (vals, 0, 0)))
23632 /* Values where only one field is non-constant are best loaded from
23633 the pool and overwritten via move later. */
23637 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23638 XVECEXP (vals, 0, one_var),
23642 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23646 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
23650 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
23652 enum machine_mode mode = GET_MODE (target);
23653 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23654 bool use_vec_merge = false;
23663 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
23664 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
23666 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
23668 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
23669 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23675 use_vec_merge = TARGET_SSE4_1;
23683 /* For the two element vectors, we implement a VEC_CONCAT with
23684 the extraction of the other element. */
23686 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
23687 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
23690 op0 = val, op1 = tmp;
23692 op0 = tmp, op1 = val;
23694 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
23695 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23700 use_vec_merge = TARGET_SSE4_1;
23707 use_vec_merge = true;
23711 /* tmp = target = A B C D */
23712 tmp = copy_to_reg (target);
23713 /* target = A A B B */
23714 emit_insn (gen_sse_unpcklps (target, target, target));
23715 /* target = X A B B */
23716 ix86_expand_vector_set (false, target, val, 0);
23717 /* target = A X C D */
23718 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23719 GEN_INT (1), GEN_INT (0),
23720 GEN_INT (2+4), GEN_INT (3+4)));
23724 /* tmp = target = A B C D */
23725 tmp = copy_to_reg (target);
23726 /* tmp = X B C D */
23727 ix86_expand_vector_set (false, tmp, val, 0);
23728 /* target = A B X D */
23729 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23730 GEN_INT (0), GEN_INT (1),
23731 GEN_INT (0+4), GEN_INT (3+4)));
23735 /* tmp = target = A B C D */
23736 tmp = copy_to_reg (target);
23737 /* tmp = X B C D */
23738 ix86_expand_vector_set (false, tmp, val, 0);
23739 /* target = A B X D */
23740 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23741 GEN_INT (0), GEN_INT (1),
23742 GEN_INT (2+4), GEN_INT (0+4)));
23746 gcc_unreachable ();
23751 use_vec_merge = TARGET_SSE4_1;
23755 /* Element 0 handled by vec_merge below. */
23758 use_vec_merge = true;
23764 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23765 store into element 0, then shuffle them back. */
23769 order[0] = GEN_INT (elt);
23770 order[1] = const1_rtx;
23771 order[2] = const2_rtx;
23772 order[3] = GEN_INT (3);
23773 order[elt] = const0_rtx;
23775 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23776 order[1], order[2], order[3]));
23778 ix86_expand_vector_set (false, target, val, 0);
23780 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23781 order[1], order[2], order[3]));
23785 /* For SSE1, we have to reuse the V4SF code. */
23786 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
23787 gen_lowpart (SFmode, val), elt);
23792 use_vec_merge = TARGET_SSE2;
23795 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23799 use_vec_merge = TARGET_SSE4_1;
23809 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
23810 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
23811 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23815 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23817 emit_move_insn (mem, target);
23819 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23820 emit_move_insn (tmp, val);
23822 emit_move_insn (target, mem);
23827 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
23829 enum machine_mode mode = GET_MODE (vec);
23830 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23831 bool use_vec_extr = false;
23844 use_vec_extr = true;
23848 use_vec_extr = TARGET_SSE4_1;
23860 tmp = gen_reg_rtx (mode);
23861 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
23862 GEN_INT (elt), GEN_INT (elt),
23863 GEN_INT (elt+4), GEN_INT (elt+4)));
23867 tmp = gen_reg_rtx (mode);
23868 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
23872 gcc_unreachable ();
23875 use_vec_extr = true;
23880 use_vec_extr = TARGET_SSE4_1;
23894 tmp = gen_reg_rtx (mode);
23895 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
23896 GEN_INT (elt), GEN_INT (elt),
23897 GEN_INT (elt), GEN_INT (elt)));
23901 tmp = gen_reg_rtx (mode);
23902 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
23906 gcc_unreachable ();
23909 use_vec_extr = true;
23914 /* For SSE1, we have to reuse the V4SF code. */
23915 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
23916 gen_lowpart (V4SFmode, vec), elt);
23922 use_vec_extr = TARGET_SSE2;
23925 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23929 use_vec_extr = TARGET_SSE4_1;
23933 /* ??? Could extract the appropriate HImode element and shift. */
23940 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
23941 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
23943 /* Let the rtl optimizers know about the zero extension performed. */
23944 if (inner_mode == QImode || inner_mode == HImode)
23946 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
23947 target = gen_lowpart (SImode, target);
23950 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23954 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23956 emit_move_insn (mem, vec);
23958 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23959 emit_move_insn (target, tmp);
23963 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
23964 pattern to reduce; DEST is the destination; IN is the input vector. */
23967 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
23969 rtx tmp1, tmp2, tmp3;
23971 tmp1 = gen_reg_rtx (V4SFmode);
23972 tmp2 = gen_reg_rtx (V4SFmode);
23973 tmp3 = gen_reg_rtx (V4SFmode);
23975 emit_insn (gen_sse_movhlps (tmp1, in, in));
23976 emit_insn (fn (tmp2, tmp1, in));
23978 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
23979 GEN_INT (1), GEN_INT (1),
23980 GEN_INT (1+4), GEN_INT (1+4)));
23981 emit_insn (fn (dest, tmp2, tmp3));
23984 /* Target hook for scalar_mode_supported_p. */
23986 ix86_scalar_mode_supported_p (enum machine_mode mode)
23988 if (DECIMAL_FLOAT_MODE_P (mode))
23990 else if (mode == TFmode)
23991 return TARGET_64BIT;
23993 return default_scalar_mode_supported_p (mode);
23996 /* Implements target hook vector_mode_supported_p. */
23998 ix86_vector_mode_supported_p (enum machine_mode mode)
24000 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24002 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24004 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24006 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24011 /* Target hook for c_mode_for_suffix. */
24012 static enum machine_mode
24013 ix86_c_mode_for_suffix (char suffix)
24015 if (TARGET_64BIT && suffix == 'q')
24017 if (TARGET_MMX && suffix == 'w')
24023 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24025 We do this in the new i386 backend to maintain source compatibility
24026 with the old cc0-based compiler. */
24029 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24030 tree inputs ATTRIBUTE_UNUSED,
24033 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24035 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24040 /* Implements target vector targetm.asm.encode_section_info. This
24041 is not used by netware. */
24043 static void ATTRIBUTE_UNUSED
24044 ix86_encode_section_info (tree decl, rtx rtl, int first)
24046 default_encode_section_info (decl, rtl, first);
24048 if (TREE_CODE (decl) == VAR_DECL
24049 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24050 && ix86_in_large_data_p (decl))
24051 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24054 /* Worker function for REVERSE_CONDITION. */
24057 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24059 return (mode != CCFPmode && mode != CCFPUmode
24060 ? reverse_condition (code)
24061 : reverse_condition_maybe_unordered (code));
24064 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24068 output_387_reg_move (rtx insn, rtx *operands)
24070 if (REG_P (operands[0]))
24072 if (REG_P (operands[1])
24073 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24075 if (REGNO (operands[0]) == FIRST_STACK_REG)
24076 return output_387_ffreep (operands, 0);
24077 return "fstp\t%y0";
24079 if (STACK_TOP_P (operands[0]))
24080 return "fld%z1\t%y1";
24083 else if (MEM_P (operands[0]))
24085 gcc_assert (REG_P (operands[1]));
24086 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24087 return "fstp%z0\t%y0";
24090 /* There is no non-popping store to memory for XFmode.
24091 So if we need one, follow the store with a load. */
24092 if (GET_MODE (operands[0]) == XFmode)
24093 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24095 return "fst%z0\t%y0";
24102 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24103 FP status register is set. */
24106 ix86_emit_fp_unordered_jump (rtx label)
24108 rtx reg = gen_reg_rtx (HImode);
24111 emit_insn (gen_x86_fnstsw_1 (reg));
24113 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24115 emit_insn (gen_x86_sahf_1 (reg));
24117 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24118 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24122 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24124 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24125 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24128 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24129 gen_rtx_LABEL_REF (VOIDmode, label),
24131 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24133 emit_jump_insn (temp);
24134 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24137 /* Output code to perform a log1p XFmode calculation. */
24139 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24141 rtx label1 = gen_label_rtx ();
24142 rtx label2 = gen_label_rtx ();
24144 rtx tmp = gen_reg_rtx (XFmode);
24145 rtx tmp2 = gen_reg_rtx (XFmode);
24147 emit_insn (gen_absxf2 (tmp, op1));
24148 emit_insn (gen_cmpxf (tmp,
24149 CONST_DOUBLE_FROM_REAL_VALUE (
24150 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24152 emit_jump_insn (gen_bge (label1));
24154 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24155 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24156 emit_jump (label2);
24158 emit_label (label1);
24159 emit_move_insn (tmp, CONST1_RTX (XFmode));
24160 emit_insn (gen_addxf3 (tmp, op1, tmp));
24161 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24162 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24164 emit_label (label2);
24167 /* Output code to perform a Newton-Rhapson approximation of a single precision
24168 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24170 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24172 rtx x0, x1, e0, e1, two;
24174 x0 = gen_reg_rtx (mode);
24175 e0 = gen_reg_rtx (mode);
24176 e1 = gen_reg_rtx (mode);
24177 x1 = gen_reg_rtx (mode);
24179 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24181 if (VECTOR_MODE_P (mode))
24182 two = ix86_build_const_vector (SFmode, true, two);
24184 two = force_reg (mode, two);
24186 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24188 /* x0 = rcp(b) estimate */
24189 emit_insn (gen_rtx_SET (VOIDmode, x0,
24190 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24193 emit_insn (gen_rtx_SET (VOIDmode, e0,
24194 gen_rtx_MULT (mode, x0, b)));
24196 emit_insn (gen_rtx_SET (VOIDmode, e1,
24197 gen_rtx_MINUS (mode, two, e0)));
24199 emit_insn (gen_rtx_SET (VOIDmode, x1,
24200 gen_rtx_MULT (mode, x0, e1)));
24202 emit_insn (gen_rtx_SET (VOIDmode, res,
24203 gen_rtx_MULT (mode, a, x1)));
24206 /* Output code to perform a Newton-Rhapson approximation of a
24207 single precision floating point [reciprocal] square root. */
24209 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24212 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24215 x0 = gen_reg_rtx (mode);
24216 e0 = gen_reg_rtx (mode);
24217 e1 = gen_reg_rtx (mode);
24218 e2 = gen_reg_rtx (mode);
24219 e3 = gen_reg_rtx (mode);
24221 real_arithmetic (&r, NEGATE_EXPR, &dconst3, NULL);
24222 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24224 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24225 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24227 if (VECTOR_MODE_P (mode))
24229 mthree = ix86_build_const_vector (SFmode, true, mthree);
24230 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24233 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24234 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24236 /* x0 = rsqrt(a) estimate */
24237 emit_insn (gen_rtx_SET (VOIDmode, x0,
24238 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24241 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24246 zero = gen_reg_rtx (mode);
24247 mask = gen_reg_rtx (mode);
24249 zero = force_reg (mode, CONST0_RTX(mode));
24250 emit_insn (gen_rtx_SET (VOIDmode, mask,
24251 gen_rtx_NE (mode, zero, a)));
24253 emit_insn (gen_rtx_SET (VOIDmode, x0,
24254 gen_rtx_AND (mode, x0, mask)));
24258 emit_insn (gen_rtx_SET (VOIDmode, e0,
24259 gen_rtx_MULT (mode, x0, a)));
24261 emit_insn (gen_rtx_SET (VOIDmode, e1,
24262 gen_rtx_MULT (mode, e0, x0)));
24265 mthree = force_reg (mode, mthree);
24266 emit_insn (gen_rtx_SET (VOIDmode, e2,
24267 gen_rtx_PLUS (mode, e1, mthree)));
24269 mhalf = force_reg (mode, mhalf);
24271 /* e3 = -.5 * x0 */
24272 emit_insn (gen_rtx_SET (VOIDmode, e3,
24273 gen_rtx_MULT (mode, x0, mhalf)));
24275 /* e3 = -.5 * e0 */
24276 emit_insn (gen_rtx_SET (VOIDmode, e3,
24277 gen_rtx_MULT (mode, e0, mhalf)));
24278 /* ret = e2 * e3 */
24279 emit_insn (gen_rtx_SET (VOIDmode, res,
24280 gen_rtx_MULT (mode, e2, e3)));
24283 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24285 static void ATTRIBUTE_UNUSED
24286 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24289 /* With Binutils 2.15, the "@unwind" marker must be specified on
24290 every occurrence of the ".eh_frame" section, not just the first
24293 && strcmp (name, ".eh_frame") == 0)
24295 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24296 flags & SECTION_WRITE ? "aw" : "a");
24299 default_elf_asm_named_section (name, flags, decl);
24302 /* Return the mangling of TYPE if it is an extended fundamental type. */
24304 static const char *
24305 ix86_mangle_type (const_tree type)
24307 type = TYPE_MAIN_VARIANT (type);
24309 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24310 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24313 switch (TYPE_MODE (type))
24316 /* __float128 is "g". */
24319 /* "long double" or __float80 is "e". */
24326 /* For 32-bit code we can save PIC register setup by using
24327 __stack_chk_fail_local hidden function instead of calling
24328 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24329 register, so it is better to call __stack_chk_fail directly. */
24332 ix86_stack_protect_fail (void)
24334 return TARGET_64BIT
24335 ? default_external_stack_protect_fail ()
24336 : default_hidden_stack_protect_fail ();
24339 /* Select a format to encode pointers in exception handling data. CODE
24340 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24341 true if the symbol may be affected by dynamic relocations.
24343 ??? All x86 object file formats are capable of representing this.
24344 After all, the relocation needed is the same as for the call insn.
24345 Whether or not a particular assembler allows us to enter such, I
24346 guess we'll have to see. */
24348 asm_preferred_eh_data_format (int code, int global)
24352 int type = DW_EH_PE_sdata8;
24354 || ix86_cmodel == CM_SMALL_PIC
24355 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24356 type = DW_EH_PE_sdata4;
24357 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24359 if (ix86_cmodel == CM_SMALL
24360 || (ix86_cmodel == CM_MEDIUM && code))
24361 return DW_EH_PE_udata4;
24362 return DW_EH_PE_absptr;
24365 /* Expand copysign from SIGN to the positive value ABS_VALUE
24366 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24369 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24371 enum machine_mode mode = GET_MODE (sign);
24372 rtx sgn = gen_reg_rtx (mode);
24373 if (mask == NULL_RTX)
24375 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24376 if (!VECTOR_MODE_P (mode))
24378 /* We need to generate a scalar mode mask in this case. */
24379 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24380 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24381 mask = gen_reg_rtx (mode);
24382 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24386 mask = gen_rtx_NOT (mode, mask);
24387 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24388 gen_rtx_AND (mode, mask, sign)));
24389 emit_insn (gen_rtx_SET (VOIDmode, result,
24390 gen_rtx_IOR (mode, abs_value, sgn)));
24393 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24394 mask for masking out the sign-bit is stored in *SMASK, if that is
24397 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24399 enum machine_mode mode = GET_MODE (op0);
24402 xa = gen_reg_rtx (mode);
24403 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24404 if (!VECTOR_MODE_P (mode))
24406 /* We need to generate a scalar mode mask in this case. */
24407 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24408 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24409 mask = gen_reg_rtx (mode);
24410 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24412 emit_insn (gen_rtx_SET (VOIDmode, xa,
24413 gen_rtx_AND (mode, op0, mask)));
24421 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24422 swapping the operands if SWAP_OPERANDS is true. The expanded
24423 code is a forward jump to a newly created label in case the
24424 comparison is true. The generated label rtx is returned. */
24426 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24427 bool swap_operands)
24438 label = gen_label_rtx ();
24439 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24440 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24441 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24442 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24443 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24444 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24445 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24446 JUMP_LABEL (tmp) = label;
24451 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24452 using comparison code CODE. Operands are swapped for the comparison if
24453 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24455 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24456 bool swap_operands)
24458 enum machine_mode mode = GET_MODE (op0);
24459 rtx mask = gen_reg_rtx (mode);
24468 if (mode == DFmode)
24469 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24470 gen_rtx_fmt_ee (code, mode, op0, op1)));
24472 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24473 gen_rtx_fmt_ee (code, mode, op0, op1)));
24478 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24479 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24481 ix86_gen_TWO52 (enum machine_mode mode)
24483 REAL_VALUE_TYPE TWO52r;
24486 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24487 TWO52 = const_double_from_real_value (TWO52r, mode);
24488 TWO52 = force_reg (mode, TWO52);
24493 /* Expand SSE sequence for computing lround from OP1 storing
24496 ix86_expand_lround (rtx op0, rtx op1)
24498 /* C code for the stuff we're doing below:
24499 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24502 enum machine_mode mode = GET_MODE (op1);
24503 const struct real_format *fmt;
24504 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24507 /* load nextafter (0.5, 0.0) */
24508 fmt = REAL_MODE_FORMAT (mode);
24509 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24510 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24512 /* adj = copysign (0.5, op1) */
24513 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24514 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24516 /* adj = op1 + adj */
24517 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24519 /* op0 = (imode)adj */
24520 expand_fix (op0, adj, 0);
24523 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24526 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24528 /* C code for the stuff we're doing below (for do_floor):
24530 xi -= (double)xi > op1 ? 1 : 0;
24533 enum machine_mode fmode = GET_MODE (op1);
24534 enum machine_mode imode = GET_MODE (op0);
24535 rtx ireg, freg, label, tmp;
24537 /* reg = (long)op1 */
24538 ireg = gen_reg_rtx (imode);
24539 expand_fix (ireg, op1, 0);
24541 /* freg = (double)reg */
24542 freg = gen_reg_rtx (fmode);
24543 expand_float (freg, ireg, 0);
24545 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24546 label = ix86_expand_sse_compare_and_jump (UNLE,
24547 freg, op1, !do_floor);
24548 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24549 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24550 emit_move_insn (ireg, tmp);
24552 emit_label (label);
24553 LABEL_NUSES (label) = 1;
24555 emit_move_insn (op0, ireg);
24558 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24559 result in OPERAND0. */
24561 ix86_expand_rint (rtx operand0, rtx operand1)
24563 /* C code for the stuff we're doing below:
24564 xa = fabs (operand1);
24565 if (!isless (xa, 2**52))
24567 xa = xa + 2**52 - 2**52;
24568 return copysign (xa, operand1);
24570 enum machine_mode mode = GET_MODE (operand0);
24571 rtx res, xa, label, TWO52, mask;
24573 res = gen_reg_rtx (mode);
24574 emit_move_insn (res, operand1);
24576 /* xa = abs (operand1) */
24577 xa = ix86_expand_sse_fabs (res, &mask);
24579 /* if (!isless (xa, TWO52)) goto label; */
24580 TWO52 = ix86_gen_TWO52 (mode);
24581 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24583 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24584 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24586 ix86_sse_copysign_to_positive (res, xa, res, mask);
24588 emit_label (label);
24589 LABEL_NUSES (label) = 1;
24591 emit_move_insn (operand0, res);
24594 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24597 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24599 /* C code for the stuff we expand below.
24600 double xa = fabs (x), x2;
24601 if (!isless (xa, TWO52))
24603 xa = xa + TWO52 - TWO52;
24604 x2 = copysign (xa, x);
24613 enum machine_mode mode = GET_MODE (operand0);
24614 rtx xa, TWO52, tmp, label, one, res, mask;
24616 TWO52 = ix86_gen_TWO52 (mode);
24618 /* Temporary for holding the result, initialized to the input
24619 operand to ease control flow. */
24620 res = gen_reg_rtx (mode);
24621 emit_move_insn (res, operand1);
24623 /* xa = abs (operand1) */
24624 xa = ix86_expand_sse_fabs (res, &mask);
24626 /* if (!isless (xa, TWO52)) goto label; */
24627 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24629 /* xa = xa + TWO52 - TWO52; */
24630 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24631 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24633 /* xa = copysign (xa, operand1) */
24634 ix86_sse_copysign_to_positive (xa, xa, res, mask);
24636 /* generate 1.0 or -1.0 */
24637 one = force_reg (mode,
24638 const_double_from_real_value (do_floor
24639 ? dconst1 : dconstm1, mode));
24641 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24642 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24643 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24644 gen_rtx_AND (mode, one, tmp)));
24645 /* We always need to subtract here to preserve signed zero. */
24646 tmp = expand_simple_binop (mode, MINUS,
24647 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24648 emit_move_insn (res, tmp);
24650 emit_label (label);
24651 LABEL_NUSES (label) = 1;
24653 emit_move_insn (operand0, res);
24656 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24659 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
24661 /* C code for the stuff we expand below.
24662 double xa = fabs (x), x2;
24663 if (!isless (xa, TWO52))
24665 x2 = (double)(long)x;
24672 if (HONOR_SIGNED_ZEROS (mode))
24673 return copysign (x2, x);
24676 enum machine_mode mode = GET_MODE (operand0);
24677 rtx xa, xi, TWO52, tmp, label, one, res, mask;
24679 TWO52 = ix86_gen_TWO52 (mode);
24681 /* Temporary for holding the result, initialized to the input
24682 operand to ease control flow. */
24683 res = gen_reg_rtx (mode);
24684 emit_move_insn (res, operand1);
24686 /* xa = abs (operand1) */
24687 xa = ix86_expand_sse_fabs (res, &mask);
24689 /* if (!isless (xa, TWO52)) goto label; */
24690 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24692 /* xa = (double)(long)x */
24693 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24694 expand_fix (xi, res, 0);
24695 expand_float (xa, xi, 0);
24698 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24700 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24701 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24702 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24703 gen_rtx_AND (mode, one, tmp)));
24704 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
24705 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24706 emit_move_insn (res, tmp);
24708 if (HONOR_SIGNED_ZEROS (mode))
24709 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24711 emit_label (label);
24712 LABEL_NUSES (label) = 1;
24714 emit_move_insn (operand0, res);
24717 /* Expand SSE sequence for computing round from OPERAND1 storing
24718 into OPERAND0. Sequence that works without relying on DImode truncation
24719 via cvttsd2siq that is only available on 64bit targets. */
24721 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
24723 /* C code for the stuff we expand below.
24724 double xa = fabs (x), xa2, x2;
24725 if (!isless (xa, TWO52))
24727 Using the absolute value and copying back sign makes
24728 -0.0 -> -0.0 correct.
24729 xa2 = xa + TWO52 - TWO52;
24734 else if (dxa > 0.5)
24736 x2 = copysign (xa2, x);
24739 enum machine_mode mode = GET_MODE (operand0);
24740 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
24742 TWO52 = ix86_gen_TWO52 (mode);
24744 /* Temporary for holding the result, initialized to the input
24745 operand to ease control flow. */
24746 res = gen_reg_rtx (mode);
24747 emit_move_insn (res, operand1);
24749 /* xa = abs (operand1) */
24750 xa = ix86_expand_sse_fabs (res, &mask);
24752 /* if (!isless (xa, TWO52)) goto label; */
24753 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24755 /* xa2 = xa + TWO52 - TWO52; */
24756 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24757 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
24759 /* dxa = xa2 - xa; */
24760 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
24762 /* generate 0.5, 1.0 and -0.5 */
24763 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
24764 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
24765 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
24769 tmp = gen_reg_rtx (mode);
24770 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24771 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
24772 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24773 gen_rtx_AND (mode, one, tmp)));
24774 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24775 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24776 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
24777 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24778 gen_rtx_AND (mode, one, tmp)));
24779 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24781 /* res = copysign (xa2, operand1) */
24782 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
24784 emit_label (label);
24785 LABEL_NUSES (label) = 1;
24787 emit_move_insn (operand0, res);
24790 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24793 ix86_expand_trunc (rtx operand0, rtx operand1)
24795 /* C code for SSE variant we expand below.
24796 double xa = fabs (x), x2;
24797 if (!isless (xa, TWO52))
24799 x2 = (double)(long)x;
24800 if (HONOR_SIGNED_ZEROS (mode))
24801 return copysign (x2, x);
24804 enum machine_mode mode = GET_MODE (operand0);
24805 rtx xa, xi, TWO52, label, res, mask;
24807 TWO52 = ix86_gen_TWO52 (mode);
24809 /* Temporary for holding the result, initialized to the input
24810 operand to ease control flow. */
24811 res = gen_reg_rtx (mode);
24812 emit_move_insn (res, operand1);
24814 /* xa = abs (operand1) */
24815 xa = ix86_expand_sse_fabs (res, &mask);
24817 /* if (!isless (xa, TWO52)) goto label; */
24818 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24820 /* x = (double)(long)x */
24821 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24822 expand_fix (xi, res, 0);
24823 expand_float (res, xi, 0);
24825 if (HONOR_SIGNED_ZEROS (mode))
24826 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24828 emit_label (label);
24829 LABEL_NUSES (label) = 1;
24831 emit_move_insn (operand0, res);
24834 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24837 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
24839 enum machine_mode mode = GET_MODE (operand0);
24840 rtx xa, mask, TWO52, label, one, res, smask, tmp;
24842 /* C code for SSE variant we expand below.
24843 double xa = fabs (x), x2;
24844 if (!isless (xa, TWO52))
24846 xa2 = xa + TWO52 - TWO52;
24850 x2 = copysign (xa2, x);
24854 TWO52 = ix86_gen_TWO52 (mode);
24856 /* Temporary for holding the result, initialized to the input
24857 operand to ease control flow. */
24858 res = gen_reg_rtx (mode);
24859 emit_move_insn (res, operand1);
24861 /* xa = abs (operand1) */
24862 xa = ix86_expand_sse_fabs (res, &smask);
24864 /* if (!isless (xa, TWO52)) goto label; */
24865 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24867 /* res = xa + TWO52 - TWO52; */
24868 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24869 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
24870 emit_move_insn (res, tmp);
24873 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24875 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24876 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
24877 emit_insn (gen_rtx_SET (VOIDmode, mask,
24878 gen_rtx_AND (mode, mask, one)));
24879 tmp = expand_simple_binop (mode, MINUS,
24880 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
24881 emit_move_insn (res, tmp);
24883 /* res = copysign (res, operand1) */
24884 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
24886 emit_label (label);
24887 LABEL_NUSES (label) = 1;
24889 emit_move_insn (operand0, res);
24892 /* Expand SSE sequence for computing round from OPERAND1 storing
24895 ix86_expand_round (rtx operand0, rtx operand1)
24897 /* C code for the stuff we're doing below:
24898 double xa = fabs (x);
24899 if (!isless (xa, TWO52))
24901 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24902 return copysign (xa, x);
24904 enum machine_mode mode = GET_MODE (operand0);
24905 rtx res, TWO52, xa, label, xi, half, mask;
24906 const struct real_format *fmt;
24907 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24909 /* Temporary for holding the result, initialized to the input
24910 operand to ease control flow. */
24911 res = gen_reg_rtx (mode);
24912 emit_move_insn (res, operand1);
24914 TWO52 = ix86_gen_TWO52 (mode);
24915 xa = ix86_expand_sse_fabs (res, &mask);
24916 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24918 /* load nextafter (0.5, 0.0) */
24919 fmt = REAL_MODE_FORMAT (mode);
24920 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24921 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24923 /* xa = xa + 0.5 */
24924 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
24925 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
24927 /* xa = (double)(int64_t)xa */
24928 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24929 expand_fix (xi, xa, 0);
24930 expand_float (xa, xi, 0);
24932 /* res = copysign (xa, operand1) */
24933 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
24935 emit_label (label);
24936 LABEL_NUSES (label) = 1;
24938 emit_move_insn (operand0, res);
24942 /* Validate whether a SSE5 instruction is valid or not.
24943 OPERANDS is the array of operands.
24944 NUM is the number of operands.
24945 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
24946 NUM_MEMORY is the maximum number of memory operands to accept. */
24948 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
24954 /* Count the number of memory arguments */
24957 for (i = 0; i < num; i++)
24959 enum machine_mode mode = GET_MODE (operands[i]);
24960 if (register_operand (operands[i], mode))
24963 else if (memory_operand (operands[i], mode))
24965 mem_mask |= (1 << i);
24971 rtx pattern = PATTERN (insn);
24973 /* allow 0 for pcmov */
24974 if (GET_CODE (pattern) != SET
24975 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
24977 || operands[i] != CONST0_RTX (mode))
24982 /* If there were no memory operations, allow the insn */
24986 /* Do not allow the destination register to be a memory operand. */
24987 else if (mem_mask & (1 << 0))
24990 /* If there are too many memory operations, disallow the instruction. While
24991 the hardware only allows 1 memory reference, before register allocation
24992 for some insns, we allow two memory operations sometimes in order to allow
24993 code like the following to be optimized:
24995 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24997 or similar cases that are vectorized into using the fmaddss
24999 else if (mem_count > num_memory)
25002 /* Don't allow more than one memory operation if not optimizing. */
25003 else if (mem_count > 1 && !optimize)
25006 else if (num == 4 && mem_count == 1)
25008 /* formats (destination is the first argument), example fmaddss:
25009 xmm1, xmm1, xmm2, xmm3/mem
25010 xmm1, xmm1, xmm2/mem, xmm3
25011 xmm1, xmm2, xmm3/mem, xmm1
25012 xmm1, xmm2/mem, xmm3, xmm1 */
25014 return ((mem_mask == (1 << 1))
25015 || (mem_mask == (1 << 2))
25016 || (mem_mask == (1 << 3)));
25018 /* format, example pmacsdd:
25019 xmm1, xmm2, xmm3/mem, xmm1 */
25021 return (mem_mask == (1 << 2));
25024 else if (num == 4 && num_memory == 2)
25026 /* If there are two memory operations, we can load one of the memory ops
25027 into the destination register. This is for optimizing the
25028 multiply/add ops, which the combiner has optimized both the multiply
25029 and the add insns to have a memory operation. We have to be careful
25030 that the destination doesn't overlap with the inputs. */
25031 rtx op0 = operands[0];
25033 if (reg_mentioned_p (op0, operands[1])
25034 || reg_mentioned_p (op0, operands[2])
25035 || reg_mentioned_p (op0, operands[3]))
25038 /* formats (destination is the first argument), example fmaddss:
25039 xmm1, xmm1, xmm2, xmm3/mem
25040 xmm1, xmm1, xmm2/mem, xmm3
25041 xmm1, xmm2, xmm3/mem, xmm1
25042 xmm1, xmm2/mem, xmm3, xmm1
25044 For the oc0 case, we will load either operands[1] or operands[3] into
25045 operands[0], so any combination of 2 memory operands is ok. */
25049 /* format, example pmacsdd:
25050 xmm1, xmm2, xmm3/mem, xmm1
25052 For the integer multiply/add instructions be more restrictive and
25053 require operands[2] and operands[3] to be the memory operands. */
25055 return (mem_mask == ((1 << 2) | (1 << 3)));
25058 else if (num == 3 && num_memory == 1)
25060 /* formats, example protb:
25061 xmm1, xmm2, xmm3/mem
25062 xmm1, xmm2/mem, xmm3 */
25064 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25066 /* format, example comeq:
25067 xmm1, xmm2, xmm3/mem */
25069 return (mem_mask == (1 << 2));
25073 gcc_unreachable ();
25079 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25080 hardware will allow by using the destination register to load one of the
25081 memory operations. Presently this is used by the multiply/add routines to
25082 allow 2 memory references. */
25085 ix86_expand_sse5_multiple_memory (rtx operands[],
25087 enum machine_mode mode)
25089 rtx op0 = operands[0];
25091 || memory_operand (op0, mode)
25092 || reg_mentioned_p (op0, operands[1])
25093 || reg_mentioned_p (op0, operands[2])
25094 || reg_mentioned_p (op0, operands[3]))
25095 gcc_unreachable ();
25097 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25098 the destination register. */
25099 if (memory_operand (operands[1], mode))
25101 emit_move_insn (op0, operands[1]);
25104 else if (memory_operand (operands[3], mode))
25106 emit_move_insn (op0, operands[3]);
25110 gcc_unreachable ();
25116 /* Table of valid machine attributes. */
25117 static const struct attribute_spec ix86_attribute_table[] =
25119 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25120 /* Stdcall attribute says callee is responsible for popping arguments
25121 if they are not variable. */
25122 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25123 /* Fastcall attribute says callee is responsible for popping arguments
25124 if they are not variable. */
25125 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25126 /* Cdecl attribute says the callee is a normal C declaration */
25127 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25128 /* Regparm attribute specifies how many integer arguments are to be
25129 passed in registers. */
25130 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25131 /* Sseregparm attribute says we are using x86_64 calling conventions
25132 for FP arguments. */
25133 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25134 /* force_align_arg_pointer says this function realigns the stack at entry. */
25135 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25136 false, true, true, ix86_handle_cconv_attribute },
25137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25138 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25139 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25140 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25142 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25143 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25144 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25145 SUBTARGET_ATTRIBUTE_TABLE,
25147 { NULL, 0, 0, false, false, false, NULL }
25150 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25152 x86_builtin_vectorization_cost (bool runtime_test)
25154 /* If the branch of the runtime test is taken - i.e. - the vectorized
25155 version is skipped - this incurs a misprediction cost (because the
25156 vectorized version is expected to be the fall-through). So we subtract
25157 the latency of a mispredicted branch from the costs that are incured
25158 when the vectorized version is executed.
25160 TODO: The values in individual target tables have to be tuned or new
25161 fields may be needed. For eg. on K8, the default branch path is the
25162 not-taken path. If the taken path is predicted correctly, the minimum
25163 penalty of going down the taken-path is 1 cycle. If the taken-path is
25164 not predicted correctly, then the minimum penalty is 10 cycles. */
25168 return (-(ix86_cost->cond_taken_branch_cost));
25174 /* Initialize the GCC target structure. */
25175 #undef TARGET_ATTRIBUTE_TABLE
25176 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25177 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25178 # undef TARGET_MERGE_DECL_ATTRIBUTES
25179 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25182 #undef TARGET_COMP_TYPE_ATTRIBUTES
25183 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25185 #undef TARGET_INIT_BUILTINS
25186 #define TARGET_INIT_BUILTINS ix86_init_builtins
25187 #undef TARGET_EXPAND_BUILTIN
25188 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25190 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25191 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25192 ix86_builtin_vectorized_function
25194 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25195 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25197 #undef TARGET_BUILTIN_RECIPROCAL
25198 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25200 #undef TARGET_ASM_FUNCTION_EPILOGUE
25201 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25203 #undef TARGET_ENCODE_SECTION_INFO
25204 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25205 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25207 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25210 #undef TARGET_ASM_OPEN_PAREN
25211 #define TARGET_ASM_OPEN_PAREN ""
25212 #undef TARGET_ASM_CLOSE_PAREN
25213 #define TARGET_ASM_CLOSE_PAREN ""
25215 #undef TARGET_ASM_ALIGNED_HI_OP
25216 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25217 #undef TARGET_ASM_ALIGNED_SI_OP
25218 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25220 #undef TARGET_ASM_ALIGNED_DI_OP
25221 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25224 #undef TARGET_ASM_UNALIGNED_HI_OP
25225 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25226 #undef TARGET_ASM_UNALIGNED_SI_OP
25227 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25228 #undef TARGET_ASM_UNALIGNED_DI_OP
25229 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25231 #undef TARGET_SCHED_ADJUST_COST
25232 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25233 #undef TARGET_SCHED_ISSUE_RATE
25234 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25235 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25236 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25237 ia32_multipass_dfa_lookahead
25239 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25240 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25243 #undef TARGET_HAVE_TLS
25244 #define TARGET_HAVE_TLS true
25246 #undef TARGET_CANNOT_FORCE_CONST_MEM
25247 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25248 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25249 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25251 #undef TARGET_DELEGITIMIZE_ADDRESS
25252 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25254 #undef TARGET_MS_BITFIELD_LAYOUT_P
25255 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25258 #undef TARGET_BINDS_LOCAL_P
25259 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25261 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25262 #undef TARGET_BINDS_LOCAL_P
25263 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25266 #undef TARGET_ASM_OUTPUT_MI_THUNK
25267 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25268 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25269 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25271 #undef TARGET_ASM_FILE_START
25272 #define TARGET_ASM_FILE_START x86_file_start
25274 #undef TARGET_DEFAULT_TARGET_FLAGS
25275 #define TARGET_DEFAULT_TARGET_FLAGS \
25277 | TARGET_SUBTARGET_DEFAULT \
25278 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25280 #undef TARGET_HANDLE_OPTION
25281 #define TARGET_HANDLE_OPTION ix86_handle_option
25283 #undef TARGET_RTX_COSTS
25284 #define TARGET_RTX_COSTS ix86_rtx_costs
25285 #undef TARGET_ADDRESS_COST
25286 #define TARGET_ADDRESS_COST ix86_address_cost
25288 #undef TARGET_FIXED_CONDITION_CODE_REGS
25289 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25290 #undef TARGET_CC_MODES_COMPATIBLE
25291 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25293 #undef TARGET_MACHINE_DEPENDENT_REORG
25294 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25296 #undef TARGET_BUILD_BUILTIN_VA_LIST
25297 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25299 #undef TARGET_EXPAND_BUILTIN_VA_START
25300 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25302 #undef TARGET_MD_ASM_CLOBBERS
25303 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25305 #undef TARGET_PROMOTE_PROTOTYPES
25306 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25307 #undef TARGET_STRUCT_VALUE_RTX
25308 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25309 #undef TARGET_SETUP_INCOMING_VARARGS
25310 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25311 #undef TARGET_MUST_PASS_IN_STACK
25312 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25313 #undef TARGET_PASS_BY_REFERENCE
25314 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25315 #undef TARGET_INTERNAL_ARG_POINTER
25316 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25317 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25318 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25319 #undef TARGET_STRICT_ARGUMENT_NAMING
25320 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25322 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25323 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25325 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25326 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25328 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25329 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25331 #undef TARGET_C_MODE_FOR_SUFFIX
25332 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25335 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25336 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25339 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25340 #undef TARGET_INSERT_ATTRIBUTES
25341 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25344 #undef TARGET_MANGLE_TYPE
25345 #define TARGET_MANGLE_TYPE ix86_mangle_type
25347 #undef TARGET_STACK_PROTECT_FAIL
25348 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25350 #undef TARGET_FUNCTION_VALUE
25351 #define TARGET_FUNCTION_VALUE ix86_function_value
25353 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25354 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25356 struct gcc_target targetm = TARGET_INITIALIZER;
25358 #include "gt-i386.h"