1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
53 #include "tm-constrs.h"
56 static int x86_builtin_vectorization_cost (bool);
58 #ifndef CHECK_STACK_LIMIT
59 #define CHECK_STACK_LIMIT (-1)
62 /* Return index of given mode in mult and division cost tables. */
63 #define MODE_INDEX(mode) \
64 ((mode) == QImode ? 0 \
65 : (mode) == HImode ? 1 \
66 : (mode) == SImode ? 2 \
67 : (mode) == DImode ? 3 \
70 /* Processor costs (relative to an add) */
71 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
72 #define COSTS_N_BYTES(N) ((N) * 2)
74 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
77 struct processor_costs size_cost = { /* costs for tuning for size */
78 COSTS_N_BYTES (2), /* cost of an add instruction */
79 COSTS_N_BYTES (3), /* cost of a lea instruction */
80 COSTS_N_BYTES (2), /* variable shift costs */
81 COSTS_N_BYTES (3), /* constant shift costs */
82 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 0, /* cost of multiply per each bit set */
88 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
89 COSTS_N_BYTES (3), /* HI */
90 COSTS_N_BYTES (3), /* SI */
91 COSTS_N_BYTES (3), /* DI */
92 COSTS_N_BYTES (5)}, /* other */
93 COSTS_N_BYTES (3), /* cost of movsx */
94 COSTS_N_BYTES (3), /* cost of movzx */
97 2, /* cost for loading QImode using movzbl */
98 {2, 2, 2}, /* cost of loading integer registers
99 in QImode, HImode and SImode.
100 Relative to reg-reg move (2). */
101 {2, 2, 2}, /* cost of storing integer registers */
102 2, /* cost of reg,reg fld/fst */
103 {2, 2, 2}, /* cost of loading fp registers
104 in SFmode, DFmode and XFmode */
105 {2, 2, 2}, /* cost of storing fp registers
106 in SFmode, DFmode and XFmode */
107 3, /* cost of moving MMX register */
108 {3, 3}, /* cost of loading MMX registers
109 in SImode and DImode */
110 {3, 3}, /* cost of storing MMX registers
111 in SImode and DImode */
112 3, /* cost of moving SSE register */
113 {3, 3, 3}, /* cost of loading SSE registers
114 in SImode, DImode and TImode */
115 {3, 3, 3}, /* cost of storing SSE registers
116 in SImode, DImode and TImode */
117 3, /* MMX or SSE register to integer */
118 0, /* size of l1 cache */
119 0, /* size of l2 cache */
120 0, /* size of prefetch block */
121 0, /* number of parallel prefetches */
123 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
124 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
125 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
126 COSTS_N_BYTES (2), /* cost of FABS instruction. */
127 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
128 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
129 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 1, /* scalar_stmt_cost. */
134 1, /* scalar load_cost. */
135 1, /* scalar_store_cost. */
136 1, /* vec_stmt_cost. */
137 1, /* vec_to_scalar_cost. */
138 1, /* scalar_to_vec_cost. */
139 1, /* vec_align_load_cost. */
140 1, /* vec_unalign_load_cost. */
141 1, /* vec_store_cost. */
142 1, /* cond_taken_branch_cost. */
143 1, /* cond_not_taken_branch_cost. */
146 /* Processor costs (relative to an add) */
148 struct processor_costs i386_cost = { /* 386 specific costs */
149 COSTS_N_INSNS (1), /* cost of an add instruction */
150 COSTS_N_INSNS (1), /* cost of a lea instruction */
151 COSTS_N_INSNS (3), /* variable shift costs */
152 COSTS_N_INSNS (2), /* constant shift costs */
153 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
154 COSTS_N_INSNS (6), /* HI */
155 COSTS_N_INSNS (6), /* SI */
156 COSTS_N_INSNS (6), /* DI */
157 COSTS_N_INSNS (6)}, /* other */
158 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
159 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
160 COSTS_N_INSNS (23), /* HI */
161 COSTS_N_INSNS (23), /* SI */
162 COSTS_N_INSNS (23), /* DI */
163 COSTS_N_INSNS (23)}, /* other */
164 COSTS_N_INSNS (3), /* cost of movsx */
165 COSTS_N_INSNS (2), /* cost of movzx */
166 15, /* "large" insn */
168 4, /* cost for loading QImode using movzbl */
169 {2, 4, 2}, /* cost of loading integer registers
170 in QImode, HImode and SImode.
171 Relative to reg-reg move (2). */
172 {2, 4, 2}, /* cost of storing integer registers */
173 2, /* cost of reg,reg fld/fst */
174 {8, 8, 8}, /* cost of loading fp registers
175 in SFmode, DFmode and XFmode */
176 {8, 8, 8}, /* cost of storing fp registers
177 in SFmode, DFmode and XFmode */
178 2, /* cost of moving MMX register */
179 {4, 8}, /* cost of loading MMX registers
180 in SImode and DImode */
181 {4, 8}, /* cost of storing MMX registers
182 in SImode and DImode */
183 2, /* cost of moving SSE register */
184 {4, 8, 16}, /* cost of loading SSE registers
185 in SImode, DImode and TImode */
186 {4, 8, 16}, /* cost of storing SSE registers
187 in SImode, DImode and TImode */
188 3, /* MMX or SSE register to integer */
189 0, /* size of l1 cache */
190 0, /* size of l2 cache */
191 0, /* size of prefetch block */
192 0, /* number of parallel prefetches */
194 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
195 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
196 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
197 COSTS_N_INSNS (22), /* cost of FABS instruction. */
198 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
199 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
200 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
201 DUMMY_STRINGOP_ALGS},
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 1, /* scalar_stmt_cost. */
205 1, /* scalar load_cost. */
206 1, /* scalar_store_cost. */
207 1, /* vec_stmt_cost. */
208 1, /* vec_to_scalar_cost. */
209 1, /* scalar_to_vec_cost. */
210 1, /* vec_align_load_cost. */
211 2, /* vec_unalign_load_cost. */
212 1, /* vec_store_cost. */
213 3, /* cond_taken_branch_cost. */
214 1, /* cond_not_taken_branch_cost. */
218 struct processor_costs i486_cost = { /* 486 specific costs */
219 COSTS_N_INSNS (1), /* cost of an add instruction */
220 COSTS_N_INSNS (1), /* cost of a lea instruction */
221 COSTS_N_INSNS (3), /* variable shift costs */
222 COSTS_N_INSNS (2), /* constant shift costs */
223 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
224 COSTS_N_INSNS (12), /* HI */
225 COSTS_N_INSNS (12), /* SI */
226 COSTS_N_INSNS (12), /* DI */
227 COSTS_N_INSNS (12)}, /* other */
228 1, /* cost of multiply per each bit set */
229 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
230 COSTS_N_INSNS (40), /* HI */
231 COSTS_N_INSNS (40), /* SI */
232 COSTS_N_INSNS (40), /* DI */
233 COSTS_N_INSNS (40)}, /* other */
234 COSTS_N_INSNS (3), /* cost of movsx */
235 COSTS_N_INSNS (2), /* cost of movzx */
236 15, /* "large" insn */
238 4, /* cost for loading QImode using movzbl */
239 {2, 4, 2}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 4, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {8, 8, 8}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {8, 8, 8}, /* cost of storing fp registers
247 in SFmode, DFmode and XFmode */
248 2, /* cost of moving MMX register */
249 {4, 8}, /* cost of loading MMX registers
250 in SImode and DImode */
251 {4, 8}, /* cost of storing MMX registers
252 in SImode and DImode */
253 2, /* cost of moving SSE register */
254 {4, 8, 16}, /* cost of loading SSE registers
255 in SImode, DImode and TImode */
256 {4, 8, 16}, /* cost of storing SSE registers
257 in SImode, DImode and TImode */
258 3, /* MMX or SSE register to integer */
259 4, /* size of l1 cache. 486 has 8kB cache
260 shared for code and data, so 4kB is
261 not really precise. */
262 4, /* size of l2 cache */
263 0, /* size of prefetch block */
264 0, /* number of parallel prefetches */
266 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
267 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
268 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
269 COSTS_N_INSNS (3), /* cost of FABS instruction. */
270 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
271 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
272 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
273 DUMMY_STRINGOP_ALGS},
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 1, /* scalar_stmt_cost. */
277 1, /* scalar load_cost. */
278 1, /* scalar_store_cost. */
279 1, /* vec_stmt_cost. */
280 1, /* vec_to_scalar_cost. */
281 1, /* scalar_to_vec_cost. */
282 1, /* vec_align_load_cost. */
283 2, /* vec_unalign_load_cost. */
284 1, /* vec_store_cost. */
285 3, /* cond_taken_branch_cost. */
286 1, /* cond_not_taken_branch_cost. */
290 struct processor_costs pentium_cost = {
291 COSTS_N_INSNS (1), /* cost of an add instruction */
292 COSTS_N_INSNS (1), /* cost of a lea instruction */
293 COSTS_N_INSNS (4), /* variable shift costs */
294 COSTS_N_INSNS (1), /* constant shift costs */
295 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
296 COSTS_N_INSNS (11), /* HI */
297 COSTS_N_INSNS (11), /* SI */
298 COSTS_N_INSNS (11), /* DI */
299 COSTS_N_INSNS (11)}, /* other */
300 0, /* cost of multiply per each bit set */
301 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
302 COSTS_N_INSNS (25), /* HI */
303 COSTS_N_INSNS (25), /* SI */
304 COSTS_N_INSNS (25), /* DI */
305 COSTS_N_INSNS (25)}, /* other */
306 COSTS_N_INSNS (3), /* cost of movsx */
307 COSTS_N_INSNS (2), /* cost of movzx */
308 8, /* "large" insn */
310 6, /* cost for loading QImode using movzbl */
311 {2, 4, 2}, /* cost of loading integer registers
312 in QImode, HImode and SImode.
313 Relative to reg-reg move (2). */
314 {2, 4, 2}, /* cost of storing integer registers */
315 2, /* cost of reg,reg fld/fst */
316 {2, 2, 6}, /* cost of loading fp registers
317 in SFmode, DFmode and XFmode */
318 {4, 4, 6}, /* cost of storing fp registers
319 in SFmode, DFmode and XFmode */
320 8, /* cost of moving MMX register */
321 {8, 8}, /* cost of loading MMX registers
322 in SImode and DImode */
323 {8, 8}, /* cost of storing MMX registers
324 in SImode and DImode */
325 2, /* cost of moving SSE register */
326 {4, 8, 16}, /* cost of loading SSE registers
327 in SImode, DImode and TImode */
328 {4, 8, 16}, /* cost of storing SSE registers
329 in SImode, DImode and TImode */
330 3, /* MMX or SSE register to integer */
331 8, /* size of l1 cache. */
332 8, /* size of l2 cache */
333 0, /* size of prefetch block */
334 0, /* number of parallel prefetches */
336 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
337 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
338 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
339 COSTS_N_INSNS (1), /* cost of FABS instruction. */
340 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
341 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
342 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
343 DUMMY_STRINGOP_ALGS},
344 {{libcall, {{-1, rep_prefix_4_byte}}},
345 DUMMY_STRINGOP_ALGS},
346 1, /* scalar_stmt_cost. */
347 1, /* scalar load_cost. */
348 1, /* scalar_store_cost. */
349 1, /* vec_stmt_cost. */
350 1, /* vec_to_scalar_cost. */
351 1, /* scalar_to_vec_cost. */
352 1, /* vec_align_load_cost. */
353 2, /* vec_unalign_load_cost. */
354 1, /* vec_store_cost. */
355 3, /* cond_taken_branch_cost. */
356 1, /* cond_not_taken_branch_cost. */
360 struct processor_costs pentiumpro_cost = {
361 COSTS_N_INSNS (1), /* cost of an add instruction */
362 COSTS_N_INSNS (1), /* cost of a lea instruction */
363 COSTS_N_INSNS (1), /* variable shift costs */
364 COSTS_N_INSNS (1), /* constant shift costs */
365 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
366 COSTS_N_INSNS (4), /* HI */
367 COSTS_N_INSNS (4), /* SI */
368 COSTS_N_INSNS (4), /* DI */
369 COSTS_N_INSNS (4)}, /* other */
370 0, /* cost of multiply per each bit set */
371 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
372 COSTS_N_INSNS (17), /* HI */
373 COSTS_N_INSNS (17), /* SI */
374 COSTS_N_INSNS (17), /* DI */
375 COSTS_N_INSNS (17)}, /* other */
376 COSTS_N_INSNS (1), /* cost of movsx */
377 COSTS_N_INSNS (1), /* cost of movzx */
378 8, /* "large" insn */
380 2, /* cost for loading QImode using movzbl */
381 {4, 4, 4}, /* cost of loading integer registers
382 in QImode, HImode and SImode.
383 Relative to reg-reg move (2). */
384 {2, 2, 2}, /* cost of storing integer registers */
385 2, /* cost of reg,reg fld/fst */
386 {2, 2, 6}, /* cost of loading fp registers
387 in SFmode, DFmode and XFmode */
388 {4, 4, 6}, /* cost of storing fp registers
389 in SFmode, DFmode and XFmode */
390 2, /* cost of moving MMX register */
391 {2, 2}, /* cost of loading MMX registers
392 in SImode and DImode */
393 {2, 2}, /* cost of storing MMX registers
394 in SImode and DImode */
395 2, /* cost of moving SSE register */
396 {2, 2, 8}, /* cost of loading SSE registers
397 in SImode, DImode and TImode */
398 {2, 2, 8}, /* cost of storing SSE registers
399 in SImode, DImode and TImode */
400 3, /* MMX or SSE register to integer */
401 8, /* size of l1 cache. */
402 256, /* size of l2 cache */
403 32, /* size of prefetch block */
404 6, /* number of parallel prefetches */
406 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
407 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
408 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
409 COSTS_N_INSNS (2), /* cost of FABS instruction. */
410 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
411 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
412 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
413 the alignment). For small blocks inline loop is still a noticeable win, for bigger
414 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
415 more expensive startup time in CPU, but after 4K the difference is down in the noise.
417 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
418 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
419 DUMMY_STRINGOP_ALGS},
420 {{rep_prefix_4_byte, {{1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, libcall}}},
422 DUMMY_STRINGOP_ALGS},
423 1, /* scalar_stmt_cost. */
424 1, /* scalar load_cost. */
425 1, /* scalar_store_cost. */
426 1, /* vec_stmt_cost. */
427 1, /* vec_to_scalar_cost. */
428 1, /* scalar_to_vec_cost. */
429 1, /* vec_align_load_cost. */
430 2, /* vec_unalign_load_cost. */
431 1, /* vec_store_cost. */
432 3, /* cond_taken_branch_cost. */
433 1, /* cond_not_taken_branch_cost. */
437 struct processor_costs geode_cost = {
438 COSTS_N_INSNS (1), /* cost of an add instruction */
439 COSTS_N_INSNS (1), /* cost of a lea instruction */
440 COSTS_N_INSNS (2), /* variable shift costs */
441 COSTS_N_INSNS (1), /* constant shift costs */
442 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
443 COSTS_N_INSNS (4), /* HI */
444 COSTS_N_INSNS (7), /* SI */
445 COSTS_N_INSNS (7), /* DI */
446 COSTS_N_INSNS (7)}, /* other */
447 0, /* cost of multiply per each bit set */
448 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
449 COSTS_N_INSNS (23), /* HI */
450 COSTS_N_INSNS (39), /* SI */
451 COSTS_N_INSNS (39), /* DI */
452 COSTS_N_INSNS (39)}, /* other */
453 COSTS_N_INSNS (1), /* cost of movsx */
454 COSTS_N_INSNS (1), /* cost of movzx */
455 8, /* "large" insn */
457 1, /* cost for loading QImode using movzbl */
458 {1, 1, 1}, /* cost of loading integer registers
459 in QImode, HImode and SImode.
460 Relative to reg-reg move (2). */
461 {1, 1, 1}, /* cost of storing integer registers */
462 1, /* cost of reg,reg fld/fst */
463 {1, 1, 1}, /* cost of loading fp registers
464 in SFmode, DFmode and XFmode */
465 {4, 6, 6}, /* cost of storing fp registers
466 in SFmode, DFmode and XFmode */
468 1, /* cost of moving MMX register */
469 {1, 1}, /* cost of loading MMX registers
470 in SImode and DImode */
471 {1, 1}, /* cost of storing MMX registers
472 in SImode and DImode */
473 1, /* cost of moving SSE register */
474 {1, 1, 1}, /* cost of loading SSE registers
475 in SImode, DImode and TImode */
476 {1, 1, 1}, /* cost of storing SSE registers
477 in SImode, DImode and TImode */
478 1, /* MMX or SSE register to integer */
479 64, /* size of l1 cache. */
480 128, /* size of l2 cache. */
481 32, /* size of prefetch block */
482 1, /* number of parallel prefetches */
484 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
485 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
486 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
487 COSTS_N_INSNS (1), /* cost of FABS instruction. */
488 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
489 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
490 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
491 DUMMY_STRINGOP_ALGS},
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 1, /* scalar_stmt_cost. */
495 1, /* scalar load_cost. */
496 1, /* scalar_store_cost. */
497 1, /* vec_stmt_cost. */
498 1, /* vec_to_scalar_cost. */
499 1, /* scalar_to_vec_cost. */
500 1, /* vec_align_load_cost. */
501 2, /* vec_unalign_load_cost. */
502 1, /* vec_store_cost. */
503 3, /* cond_taken_branch_cost. */
504 1, /* cond_not_taken_branch_cost. */
508 struct processor_costs k6_cost = {
509 COSTS_N_INSNS (1), /* cost of an add instruction */
510 COSTS_N_INSNS (2), /* cost of a lea instruction */
511 COSTS_N_INSNS (1), /* variable shift costs */
512 COSTS_N_INSNS (1), /* constant shift costs */
513 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
514 COSTS_N_INSNS (3), /* HI */
515 COSTS_N_INSNS (3), /* SI */
516 COSTS_N_INSNS (3), /* DI */
517 COSTS_N_INSNS (3)}, /* other */
518 0, /* cost of multiply per each bit set */
519 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
520 COSTS_N_INSNS (18), /* HI */
521 COSTS_N_INSNS (18), /* SI */
522 COSTS_N_INSNS (18), /* DI */
523 COSTS_N_INSNS (18)}, /* other */
524 COSTS_N_INSNS (2), /* cost of movsx */
525 COSTS_N_INSNS (2), /* cost of movzx */
526 8, /* "large" insn */
528 3, /* cost for loading QImode using movzbl */
529 {4, 5, 4}, /* cost of loading integer registers
530 in QImode, HImode and SImode.
531 Relative to reg-reg move (2). */
532 {2, 3, 2}, /* cost of storing integer registers */
533 4, /* cost of reg,reg fld/fst */
534 {6, 6, 6}, /* cost of loading fp registers
535 in SFmode, DFmode and XFmode */
536 {4, 4, 4}, /* cost of storing fp registers
537 in SFmode, DFmode and XFmode */
538 2, /* cost of moving MMX register */
539 {2, 2}, /* cost of loading MMX registers
540 in SImode and DImode */
541 {2, 2}, /* cost of storing MMX registers
542 in SImode and DImode */
543 2, /* cost of moving SSE register */
544 {2, 2, 8}, /* cost of loading SSE registers
545 in SImode, DImode and TImode */
546 {2, 2, 8}, /* cost of storing SSE registers
547 in SImode, DImode and TImode */
548 6, /* MMX or SSE register to integer */
549 32, /* size of l1 cache. */
550 32, /* size of l2 cache. Some models
551 have integrated l2 cache, but
552 optimizing for k6 is not important
553 enough to worry about that. */
554 32, /* size of prefetch block */
555 1, /* number of parallel prefetches */
557 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
558 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
559 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
562 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
563 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
564 DUMMY_STRINGOP_ALGS},
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 1, /* scalar_stmt_cost. */
568 1, /* scalar load_cost. */
569 1, /* scalar_store_cost. */
570 1, /* vec_stmt_cost. */
571 1, /* vec_to_scalar_cost. */
572 1, /* scalar_to_vec_cost. */
573 1, /* vec_align_load_cost. */
574 2, /* vec_unalign_load_cost. */
575 1, /* vec_store_cost. */
576 3, /* cond_taken_branch_cost. */
577 1, /* cond_not_taken_branch_cost. */
581 struct processor_costs athlon_cost = {
582 COSTS_N_INSNS (1), /* cost of an add instruction */
583 COSTS_N_INSNS (2), /* cost of a lea instruction */
584 COSTS_N_INSNS (1), /* variable shift costs */
585 COSTS_N_INSNS (1), /* constant shift costs */
586 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
587 COSTS_N_INSNS (5), /* HI */
588 COSTS_N_INSNS (5), /* SI */
589 COSTS_N_INSNS (5), /* DI */
590 COSTS_N_INSNS (5)}, /* other */
591 0, /* cost of multiply per each bit set */
592 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
593 COSTS_N_INSNS (26), /* HI */
594 COSTS_N_INSNS (42), /* SI */
595 COSTS_N_INSNS (74), /* DI */
596 COSTS_N_INSNS (74)}, /* other */
597 COSTS_N_INSNS (1), /* cost of movsx */
598 COSTS_N_INSNS (1), /* cost of movzx */
599 8, /* "large" insn */
601 4, /* cost for loading QImode using movzbl */
602 {3, 4, 3}, /* cost of loading integer registers
603 in QImode, HImode and SImode.
604 Relative to reg-reg move (2). */
605 {3, 4, 3}, /* cost of storing integer registers */
606 4, /* cost of reg,reg fld/fst */
607 {4, 4, 12}, /* cost of loading fp registers
608 in SFmode, DFmode and XFmode */
609 {6, 6, 8}, /* cost of storing fp registers
610 in SFmode, DFmode and XFmode */
611 2, /* cost of moving MMX register */
612 {4, 4}, /* cost of loading MMX registers
613 in SImode and DImode */
614 {4, 4}, /* cost of storing MMX registers
615 in SImode and DImode */
616 2, /* cost of moving SSE register */
617 {4, 4, 6}, /* cost of loading SSE registers
618 in SImode, DImode and TImode */
619 {4, 4, 5}, /* cost of storing SSE registers
620 in SImode, DImode and TImode */
621 5, /* MMX or SSE register to integer */
622 64, /* size of l1 cache. */
623 256, /* size of l2 cache. */
624 64, /* size of prefetch block */
625 6, /* number of parallel prefetches */
627 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
633 /* For some reason, Athlon deals better with REP prefix (relative to loops)
634 compared to K8. Alignment becomes important after 8 bytes for memcpy and
635 128 bytes for memset. */
636 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
637 DUMMY_STRINGOP_ALGS},
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs k8_cost = {
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (2), /* cost of a lea instruction */
657 COSTS_N_INSNS (1), /* variable shift costs */
658 COSTS_N_INSNS (1), /* constant shift costs */
659 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (4), /* HI */
661 COSTS_N_INSNS (3), /* SI */
662 COSTS_N_INSNS (4), /* DI */
663 COSTS_N_INSNS (5)}, /* other */
664 0, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (26), /* HI */
667 COSTS_N_INSNS (42), /* SI */
668 COSTS_N_INSNS (74), /* DI */
669 COSTS_N_INSNS (74)}, /* other */
670 COSTS_N_INSNS (1), /* cost of movsx */
671 COSTS_N_INSNS (1), /* cost of movzx */
672 8, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {3, 4, 3}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {3, 4, 3}, /* cost of storing integer registers */
679 4, /* cost of reg,reg fld/fst */
680 {4, 4, 12}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {6, 6, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {3, 3}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 4}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 3, 6}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 4, 5}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 5, /* MMX or SSE register to integer */
695 64, /* size of l1 cache. */
696 512, /* size of l2 cache. */
697 64, /* size of prefetch block */
698 /* New AMD processors never drop prefetches; if they cannot be performed
699 immediately, they are queued. We set number of simultaneous prefetches
700 to a large constant to reflect this (it probably is not a good idea not
701 to limit number of prefetches at all, as their execution also takes some
703 100, /* number of parallel prefetches */
705 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (2), /* cost of FABS instruction. */
709 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
711 /* K8 has optimized REP instruction for medium sized blocks, but for very small
712 blocks it is better to use loop. For large blocks, libcall can do
713 nontemporary accesses and beat inline considerably. */
714 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
715 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
716 {{libcall, {{8, loop}, {24, unrolled_loop},
717 {2048, rep_prefix_4_byte}, {-1, libcall}}},
718 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 4, /* scalar_stmt_cost. */
720 2, /* scalar load_cost. */
721 2, /* scalar_store_cost. */
722 5, /* vec_stmt_cost. */
723 0, /* vec_to_scalar_cost. */
724 2, /* scalar_to_vec_cost. */
725 2, /* vec_align_load_cost. */
726 3, /* vec_unalign_load_cost. */
727 3, /* vec_store_cost. */
728 3, /* cond_taken_branch_cost. */
729 2, /* cond_not_taken_branch_cost. */
732 struct processor_costs amdfam10_cost = {
733 COSTS_N_INSNS (1), /* cost of an add instruction */
734 COSTS_N_INSNS (2), /* cost of a lea instruction */
735 COSTS_N_INSNS (1), /* variable shift costs */
736 COSTS_N_INSNS (1), /* constant shift costs */
737 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
738 COSTS_N_INSNS (4), /* HI */
739 COSTS_N_INSNS (3), /* SI */
740 COSTS_N_INSNS (4), /* DI */
741 COSTS_N_INSNS (5)}, /* other */
742 0, /* cost of multiply per each bit set */
743 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
744 COSTS_N_INSNS (35), /* HI */
745 COSTS_N_INSNS (51), /* SI */
746 COSTS_N_INSNS (83), /* DI */
747 COSTS_N_INSNS (83)}, /* other */
748 COSTS_N_INSNS (1), /* cost of movsx */
749 COSTS_N_INSNS (1), /* cost of movzx */
750 8, /* "large" insn */
752 4, /* cost for loading QImode using movzbl */
753 {3, 4, 3}, /* cost of loading integer registers
754 in QImode, HImode and SImode.
755 Relative to reg-reg move (2). */
756 {3, 4, 3}, /* cost of storing integer registers */
757 4, /* cost of reg,reg fld/fst */
758 {4, 4, 12}, /* cost of loading fp registers
759 in SFmode, DFmode and XFmode */
760 {6, 6, 8}, /* cost of storing fp registers
761 in SFmode, DFmode and XFmode */
762 2, /* cost of moving MMX register */
763 {3, 3}, /* cost of loading MMX registers
764 in SImode and DImode */
765 {4, 4}, /* cost of storing MMX registers
766 in SImode and DImode */
767 2, /* cost of moving SSE register */
768 {4, 4, 3}, /* cost of loading SSE registers
769 in SImode, DImode and TImode */
770 {4, 4, 5}, /* cost of storing SSE registers
771 in SImode, DImode and TImode */
772 3, /* MMX or SSE register to integer */
774 MOVD reg64, xmmreg Double FSTORE 4
775 MOVD reg32, xmmreg Double FSTORE 4
777 MOVD reg64, xmmreg Double FADD 3
779 MOVD reg32, xmmreg Double FADD 3
781 64, /* size of l1 cache. */
782 512, /* size of l2 cache. */
783 64, /* size of prefetch block */
784 /* New AMD processors never drop prefetches; if they cannot be performed
785 immediately, they are queued. We set number of simultaneous prefetches
786 to a large constant to reflect this (it probably is not a good idea not
787 to limit number of prefetches at all, as their execution also takes some
789 100, /* number of parallel prefetches */
791 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
792 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
793 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
794 COSTS_N_INSNS (2), /* cost of FABS instruction. */
795 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
796 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
798 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
799 very small blocks it is better to use loop. For large blocks, libcall can
800 do nontemporary accesses and beat inline considerably. */
801 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
802 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
803 {{libcall, {{8, loop}, {24, unrolled_loop},
804 {2048, rep_prefix_4_byte}, {-1, libcall}}},
805 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 4, /* scalar_stmt_cost. */
807 2, /* scalar load_cost. */
808 2, /* scalar_store_cost. */
809 6, /* vec_stmt_cost. */
810 0, /* vec_to_scalar_cost. */
811 2, /* scalar_to_vec_cost. */
812 2, /* vec_align_load_cost. */
813 2, /* vec_unalign_load_cost. */
814 2, /* vec_store_cost. */
815 2, /* cond_taken_branch_cost. */
816 1, /* cond_not_taken_branch_cost. */
820 struct processor_costs pentium4_cost = {
821 COSTS_N_INSNS (1), /* cost of an add instruction */
822 COSTS_N_INSNS (3), /* cost of a lea instruction */
823 COSTS_N_INSNS (4), /* variable shift costs */
824 COSTS_N_INSNS (4), /* constant shift costs */
825 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
826 COSTS_N_INSNS (15), /* HI */
827 COSTS_N_INSNS (15), /* SI */
828 COSTS_N_INSNS (15), /* DI */
829 COSTS_N_INSNS (15)}, /* other */
830 0, /* cost of multiply per each bit set */
831 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
832 COSTS_N_INSNS (56), /* HI */
833 COSTS_N_INSNS (56), /* SI */
834 COSTS_N_INSNS (56), /* DI */
835 COSTS_N_INSNS (56)}, /* other */
836 COSTS_N_INSNS (1), /* cost of movsx */
837 COSTS_N_INSNS (1), /* cost of movzx */
838 16, /* "large" insn */
840 2, /* cost for loading QImode using movzbl */
841 {4, 5, 4}, /* cost of loading integer registers
842 in QImode, HImode and SImode.
843 Relative to reg-reg move (2). */
844 {2, 3, 2}, /* cost of storing integer registers */
845 2, /* cost of reg,reg fld/fst */
846 {2, 2, 6}, /* cost of loading fp registers
847 in SFmode, DFmode and XFmode */
848 {4, 4, 6}, /* cost of storing fp registers
849 in SFmode, DFmode and XFmode */
850 2, /* cost of moving MMX register */
851 {2, 2}, /* cost of loading MMX registers
852 in SImode and DImode */
853 {2, 2}, /* cost of storing MMX registers
854 in SImode and DImode */
855 12, /* cost of moving SSE register */
856 {12, 12, 12}, /* cost of loading SSE registers
857 in SImode, DImode and TImode */
858 {2, 2, 8}, /* cost of storing SSE registers
859 in SImode, DImode and TImode */
860 10, /* MMX or SSE register to integer */
861 8, /* size of l1 cache. */
862 256, /* size of l2 cache. */
863 64, /* size of prefetch block */
864 6, /* number of parallel prefetches */
866 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
867 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
868 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
869 COSTS_N_INSNS (2), /* cost of FABS instruction. */
870 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
871 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
872 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
873 DUMMY_STRINGOP_ALGS},
874 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
876 DUMMY_STRINGOP_ALGS},
877 1, /* scalar_stmt_cost. */
878 1, /* scalar load_cost. */
879 1, /* scalar_store_cost. */
880 1, /* vec_stmt_cost. */
881 1, /* vec_to_scalar_cost. */
882 1, /* scalar_to_vec_cost. */
883 1, /* vec_align_load_cost. */
884 2, /* vec_unalign_load_cost. */
885 1, /* vec_store_cost. */
886 3, /* cond_taken_branch_cost. */
887 1, /* cond_not_taken_branch_cost. */
891 struct processor_costs nocona_cost = {
892 COSTS_N_INSNS (1), /* cost of an add instruction */
893 COSTS_N_INSNS (1), /* cost of a lea instruction */
894 COSTS_N_INSNS (1), /* variable shift costs */
895 COSTS_N_INSNS (1), /* constant shift costs */
896 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
897 COSTS_N_INSNS (10), /* HI */
898 COSTS_N_INSNS (10), /* SI */
899 COSTS_N_INSNS (10), /* DI */
900 COSTS_N_INSNS (10)}, /* other */
901 0, /* cost of multiply per each bit set */
902 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
903 COSTS_N_INSNS (66), /* HI */
904 COSTS_N_INSNS (66), /* SI */
905 COSTS_N_INSNS (66), /* DI */
906 COSTS_N_INSNS (66)}, /* other */
907 COSTS_N_INSNS (1), /* cost of movsx */
908 COSTS_N_INSNS (1), /* cost of movzx */
909 16, /* "large" insn */
911 4, /* cost for loading QImode using movzbl */
912 {4, 4, 4}, /* cost of loading integer registers
913 in QImode, HImode and SImode.
914 Relative to reg-reg move (2). */
915 {4, 4, 4}, /* cost of storing integer registers */
916 3, /* cost of reg,reg fld/fst */
917 {12, 12, 12}, /* cost of loading fp registers
918 in SFmode, DFmode and XFmode */
919 {4, 4, 4}, /* cost of storing fp registers
920 in SFmode, DFmode and XFmode */
921 6, /* cost of moving MMX register */
922 {12, 12}, /* cost of loading MMX registers
923 in SImode and DImode */
924 {12, 12}, /* cost of storing MMX registers
925 in SImode and DImode */
926 6, /* cost of moving SSE register */
927 {12, 12, 12}, /* cost of loading SSE registers
928 in SImode, DImode and TImode */
929 {12, 12, 12}, /* cost of storing SSE registers
930 in SImode, DImode and TImode */
931 8, /* MMX or SSE register to integer */
932 8, /* size of l1 cache. */
933 1024, /* size of l2 cache. */
934 128, /* size of prefetch block */
935 8, /* number of parallel prefetches */
937 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
938 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
939 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
940 COSTS_N_INSNS (3), /* cost of FABS instruction. */
941 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
942 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
943 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
944 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
945 {100000, unrolled_loop}, {-1, libcall}}}},
946 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
948 {libcall, {{24, loop}, {64, unrolled_loop},
949 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
950 1, /* scalar_stmt_cost. */
951 1, /* scalar load_cost. */
952 1, /* scalar_store_cost. */
953 1, /* vec_stmt_cost. */
954 1, /* vec_to_scalar_cost. */
955 1, /* scalar_to_vec_cost. */
956 1, /* vec_align_load_cost. */
957 2, /* vec_unalign_load_cost. */
958 1, /* vec_store_cost. */
959 3, /* cond_taken_branch_cost. */
960 1, /* cond_not_taken_branch_cost. */
964 struct processor_costs core2_cost = {
965 COSTS_N_INSNS (1), /* cost of an add instruction */
966 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
967 COSTS_N_INSNS (1), /* variable shift costs */
968 COSTS_N_INSNS (1), /* constant shift costs */
969 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
970 COSTS_N_INSNS (3), /* HI */
971 COSTS_N_INSNS (3), /* SI */
972 COSTS_N_INSNS (3), /* DI */
973 COSTS_N_INSNS (3)}, /* other */
974 0, /* cost of multiply per each bit set */
975 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
976 COSTS_N_INSNS (22), /* HI */
977 COSTS_N_INSNS (22), /* SI */
978 COSTS_N_INSNS (22), /* DI */
979 COSTS_N_INSNS (22)}, /* other */
980 COSTS_N_INSNS (1), /* cost of movsx */
981 COSTS_N_INSNS (1), /* cost of movzx */
982 8, /* "large" insn */
984 2, /* cost for loading QImode using movzbl */
985 {6, 6, 6}, /* cost of loading integer registers
986 in QImode, HImode and SImode.
987 Relative to reg-reg move (2). */
988 {4, 4, 4}, /* cost of storing integer registers */
989 2, /* cost of reg,reg fld/fst */
990 {6, 6, 6}, /* cost of loading fp registers
991 in SFmode, DFmode and XFmode */
992 {4, 4, 4}, /* cost of loading integer registers */
993 2, /* cost of moving MMX register */
994 {6, 6}, /* cost of loading MMX registers
995 in SImode and DImode */
996 {4, 4}, /* cost of storing MMX registers
997 in SImode and DImode */
998 2, /* cost of moving SSE register */
999 {6, 6, 6}, /* cost of loading SSE registers
1000 in SImode, DImode and TImode */
1001 {4, 4, 4}, /* cost of storing SSE registers
1002 in SImode, DImode and TImode */
1003 2, /* MMX or SSE register to integer */
1004 32, /* size of l1 cache. */
1005 2048, /* size of l2 cache. */
1006 128, /* size of prefetch block */
1007 8, /* number of parallel prefetches */
1008 3, /* Branch cost */
1009 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1010 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1011 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1012 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1013 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1014 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1015 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1016 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1017 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1018 {{libcall, {{8, loop}, {15, unrolled_loop},
1019 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1020 {libcall, {{24, loop}, {32, unrolled_loop},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 1, /* scalar_stmt_cost. */
1023 1, /* scalar load_cost. */
1024 1, /* scalar_store_cost. */
1025 1, /* vec_stmt_cost. */
1026 1, /* vec_to_scalar_cost. */
1027 1, /* scalar_to_vec_cost. */
1028 1, /* vec_align_load_cost. */
1029 2, /* vec_unalign_load_cost. */
1030 1, /* vec_store_cost. */
1031 3, /* cond_taken_branch_cost. */
1032 1, /* cond_not_taken_branch_cost. */
1035 /* Generic64 should produce code tuned for Nocona and K8. */
1037 struct processor_costs generic64_cost = {
1038 COSTS_N_INSNS (1), /* cost of an add instruction */
1039 /* On all chips taken into consideration lea is 2 cycles and more. With
1040 this cost however our current implementation of synth_mult results in
1041 use of unnecessary temporary registers causing regression on several
1042 SPECfp benchmarks. */
1043 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1044 COSTS_N_INSNS (1), /* variable shift costs */
1045 COSTS_N_INSNS (1), /* constant shift costs */
1046 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1047 COSTS_N_INSNS (4), /* HI */
1048 COSTS_N_INSNS (3), /* SI */
1049 COSTS_N_INSNS (4), /* DI */
1050 COSTS_N_INSNS (2)}, /* other */
1051 0, /* cost of multiply per each bit set */
1052 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1053 COSTS_N_INSNS (26), /* HI */
1054 COSTS_N_INSNS (42), /* SI */
1055 COSTS_N_INSNS (74), /* DI */
1056 COSTS_N_INSNS (74)}, /* other */
1057 COSTS_N_INSNS (1), /* cost of movsx */
1058 COSTS_N_INSNS (1), /* cost of movzx */
1059 8, /* "large" insn */
1060 17, /* MOVE_RATIO */
1061 4, /* cost for loading QImode using movzbl */
1062 {4, 4, 4}, /* cost of loading integer registers
1063 in QImode, HImode and SImode.
1064 Relative to reg-reg move (2). */
1065 {4, 4, 4}, /* cost of storing integer registers */
1066 4, /* cost of reg,reg fld/fst */
1067 {12, 12, 12}, /* cost of loading fp registers
1068 in SFmode, DFmode and XFmode */
1069 {6, 6, 8}, /* cost of storing fp registers
1070 in SFmode, DFmode and XFmode */
1071 2, /* cost of moving MMX register */
1072 {8, 8}, /* cost of loading MMX registers
1073 in SImode and DImode */
1074 {8, 8}, /* cost of storing MMX registers
1075 in SImode and DImode */
1076 2, /* cost of moving SSE register */
1077 {8, 8, 8}, /* cost of loading SSE registers
1078 in SImode, DImode and TImode */
1079 {8, 8, 8}, /* cost of storing SSE registers
1080 in SImode, DImode and TImode */
1081 5, /* MMX or SSE register to integer */
1082 32, /* size of l1 cache. */
1083 512, /* size of l2 cache. */
1084 64, /* size of prefetch block */
1085 6, /* number of parallel prefetches */
1086 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1087 is increased to perhaps more appropriate value of 5. */
1088 3, /* Branch cost */
1089 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1090 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1091 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1092 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1093 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1094 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1095 {DUMMY_STRINGOP_ALGS,
1096 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1112 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1114 struct processor_costs generic32_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1117 COSTS_N_INSNS (1), /* variable shift costs */
1118 COSTS_N_INSNS (1), /* constant shift costs */
1119 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1120 COSTS_N_INSNS (4), /* HI */
1121 COSTS_N_INSNS (3), /* SI */
1122 COSTS_N_INSNS (4), /* DI */
1123 COSTS_N_INSNS (2)}, /* other */
1124 0, /* cost of multiply per each bit set */
1125 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1126 COSTS_N_INSNS (26), /* HI */
1127 COSTS_N_INSNS (42), /* SI */
1128 COSTS_N_INSNS (74), /* DI */
1129 COSTS_N_INSNS (74)}, /* other */
1130 COSTS_N_INSNS (1), /* cost of movsx */
1131 COSTS_N_INSNS (1), /* cost of movzx */
1132 8, /* "large" insn */
1133 17, /* MOVE_RATIO */
1134 4, /* cost for loading QImode using movzbl */
1135 {4, 4, 4}, /* cost of loading integer registers
1136 in QImode, HImode and SImode.
1137 Relative to reg-reg move (2). */
1138 {4, 4, 4}, /* cost of storing integer registers */
1139 4, /* cost of reg,reg fld/fst */
1140 {12, 12, 12}, /* cost of loading fp registers
1141 in SFmode, DFmode and XFmode */
1142 {6, 6, 8}, /* cost of storing fp registers
1143 in SFmode, DFmode and XFmode */
1144 2, /* cost of moving MMX register */
1145 {8, 8}, /* cost of loading MMX registers
1146 in SImode and DImode */
1147 {8, 8}, /* cost of storing MMX registers
1148 in SImode and DImode */
1149 2, /* cost of moving SSE register */
1150 {8, 8, 8}, /* cost of loading SSE registers
1151 in SImode, DImode and TImode */
1152 {8, 8, 8}, /* cost of storing SSE registers
1153 in SImode, DImode and TImode */
1154 5, /* MMX or SSE register to integer */
1155 32, /* size of l1 cache. */
1156 256, /* size of l2 cache. */
1157 64, /* size of prefetch block */
1158 6, /* number of parallel prefetches */
1159 3, /* Branch cost */
1160 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1161 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1162 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1163 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1164 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1165 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1166 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1167 DUMMY_STRINGOP_ALGS},
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 1, /* scalar_stmt_cost. */
1171 1, /* scalar load_cost. */
1172 1, /* scalar_store_cost. */
1173 1, /* vec_stmt_cost. */
1174 1, /* vec_to_scalar_cost. */
1175 1, /* scalar_to_vec_cost. */
1176 1, /* vec_align_load_cost. */
1177 2, /* vec_unalign_load_cost. */
1178 1, /* vec_store_cost. */
1179 3, /* cond_taken_branch_cost. */
1180 1, /* cond_not_taken_branch_cost. */
1183 const struct processor_costs *ix86_cost = &pentium_cost;
1185 /* Processor feature/optimization bitmasks. */
1186 #define m_386 (1<<PROCESSOR_I386)
1187 #define m_486 (1<<PROCESSOR_I486)
1188 #define m_PENT (1<<PROCESSOR_PENTIUM)
1189 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1190 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1191 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1192 #define m_CORE2 (1<<PROCESSOR_CORE2)
1194 #define m_GEODE (1<<PROCESSOR_GEODE)
1195 #define m_K6 (1<<PROCESSOR_K6)
1196 #define m_K6_GEODE (m_K6 | m_GEODE)
1197 #define m_K8 (1<<PROCESSOR_K8)
1198 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1199 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1200 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1201 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1203 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1204 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1206 /* Generic instruction choice should be common subset of supported CPUs
1207 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1208 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1210 /* Feature tests against the various tunings. */
1211 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1212 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1213 negatively, so enabling for Generic64 seems like good code size
1214 tradeoff. We can't enable it for 32bit generic because it does not
1215 work well with PPro base chips. */
1216 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1218 /* X86_TUNE_PUSH_MEMORY */
1219 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1220 | m_NOCONA | m_CORE2 | m_GENERIC,
1222 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1225 /* X86_TUNE_USE_BIT_TEST */
1228 /* X86_TUNE_UNROLL_STRLEN */
1229 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1231 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1232 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1234 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1235 on simulation result. But after P4 was made, no performance benefit
1236 was observed with branch hints. It also increases the code size.
1237 As a result, icc never generates branch hints. */
1240 /* X86_TUNE_DOUBLE_WITH_ADD */
1243 /* X86_TUNE_USE_SAHF */
1244 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1245 | m_NOCONA | m_CORE2 | m_GENERIC,
1247 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1248 partial dependencies. */
1249 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1250 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1252 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1253 register stalls on Generic32 compilation setting as well. However
1254 in current implementation the partial register stalls are not eliminated
1255 very well - they can be introduced via subregs synthesized by combine
1256 and can happen in caller/callee saving sequences. Because this option
1257 pays back little on PPro based chips and is in conflict with partial reg
1258 dependencies used by Athlon/P4 based chips, it is better to leave it off
1259 for generic32 for now. */
1262 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1263 m_CORE2 | m_GENERIC,
1265 /* X86_TUNE_USE_HIMODE_FIOP */
1266 m_386 | m_486 | m_K6_GEODE,
1268 /* X86_TUNE_USE_SIMODE_FIOP */
1269 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1271 /* X86_TUNE_USE_MOV0 */
1274 /* X86_TUNE_USE_CLTD */
1275 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1277 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1280 /* X86_TUNE_SPLIT_LONG_MOVES */
1283 /* X86_TUNE_READ_MODIFY_WRITE */
1286 /* X86_TUNE_READ_MODIFY */
1289 /* X86_TUNE_PROMOTE_QIMODE */
1290 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1291 | m_GENERIC /* | m_PENT4 ? */,
1293 /* X86_TUNE_FAST_PREFIX */
1294 ~(m_PENT | m_486 | m_386),
1296 /* X86_TUNE_SINGLE_STRINGOP */
1297 m_386 | m_PENT4 | m_NOCONA,
1299 /* X86_TUNE_QIMODE_MATH */
1302 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1303 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1304 might be considered for Generic32 if our scheme for avoiding partial
1305 stalls was more effective. */
1308 /* X86_TUNE_PROMOTE_QI_REGS */
1311 /* X86_TUNE_PROMOTE_HI_REGS */
1314 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1315 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1317 /* X86_TUNE_ADD_ESP_8 */
1318 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1319 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1321 /* X86_TUNE_SUB_ESP_4 */
1322 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1324 /* X86_TUNE_SUB_ESP_8 */
1325 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1326 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1328 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1329 for DFmode copies */
1330 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1331 | m_GENERIC | m_GEODE),
1333 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1334 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1336 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1337 conflict here in between PPro/Pentium4 based chips that thread 128bit
1338 SSE registers as single units versus K8 based chips that divide SSE
1339 registers to two 64bit halves. This knob promotes all store destinations
1340 to be 128bit to allow register renaming on 128bit SSE units, but usually
1341 results in one extra microop on 64bit SSE units. Experimental results
1342 shows that disabling this option on P4 brings over 20% SPECfp regression,
1343 while enabling it on K8 brings roughly 2.4% regression that can be partly
1344 masked by careful scheduling of moves. */
1345 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1347 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1350 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1351 are resolved on SSE register parts instead of whole registers, so we may
1352 maintain just lower part of scalar values in proper format leaving the
1353 upper part undefined. */
1356 /* X86_TUNE_SSE_TYPELESS_STORES */
1359 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1360 m_PPRO | m_PENT4 | m_NOCONA,
1362 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1363 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1365 /* X86_TUNE_PROLOGUE_USING_MOVE */
1366 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1368 /* X86_TUNE_EPILOGUE_USING_MOVE */
1369 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1371 /* X86_TUNE_SHIFT1 */
1374 /* X86_TUNE_USE_FFREEP */
1377 /* X86_TUNE_INTER_UNIT_MOVES */
1378 ~(m_AMD_MULTIPLE | m_GENERIC),
1380 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1383 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1384 than 4 branch instructions in the 16 byte window. */
1385 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1387 /* X86_TUNE_SCHEDULE */
1388 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1390 /* X86_TUNE_USE_BT */
1393 /* X86_TUNE_USE_INCDEC */
1394 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1396 /* X86_TUNE_PAD_RETURNS */
1397 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1399 /* X86_TUNE_EXT_80387_CONSTANTS */
1400 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1402 /* X86_TUNE_SHORTEN_X87_SSE */
1405 /* X86_TUNE_AVOID_VECTOR_DECODE */
1408 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1409 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1412 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1413 vector path on AMD machines. */
1414 m_K8 | m_GENERIC64 | m_AMDFAM10,
1416 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1418 m_K8 | m_GENERIC64 | m_AMDFAM10,
1420 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1424 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1425 but one byte longer. */
1428 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1429 operand that cannot be represented using a modRM byte. The XOR
1430 replacement is long decoded, so this split helps here as well. */
1433 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1434 from integer to FP. */
1438 /* Feature tests against the various architecture variations. */
1439 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1440 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1441 ~(m_386 | m_486 | m_PENT | m_K6),
1443 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1446 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1449 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1452 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1456 static const unsigned int x86_accumulate_outgoing_args
1457 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1459 static const unsigned int x86_arch_always_fancy_math_387
1460 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1461 | m_NOCONA | m_CORE2 | m_GENERIC;
1463 static enum stringop_alg stringop_alg = no_stringop;
1465 /* In case the average insn count for single function invocation is
1466 lower than this constant, emit fast (but longer) prologue and
1468 #define FAST_PROLOGUE_INSN_COUNT 20
1470 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1471 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1472 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1473 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1475 /* Array of the smallest class containing reg number REGNO, indexed by
1476 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1478 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1480 /* ax, dx, cx, bx */
1481 AREG, DREG, CREG, BREG,
1482 /* si, di, bp, sp */
1483 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1485 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1486 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1489 /* flags, fpsr, fpcr, frame */
1490 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1492 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1495 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1498 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1499 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1500 /* SSE REX registers */
1501 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1505 /* The "default" register map used in 32bit mode. */
1507 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1509 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1510 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1511 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1512 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1513 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1515 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1518 static int const x86_64_int_parameter_registers[6] =
1520 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1521 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1524 static int const x86_64_ms_abi_int_parameter_registers[4] =
1526 2 /*RCX*/, 1 /*RDX*/,
1527 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1530 static int const x86_64_int_return_registers[4] =
1532 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1535 /* The "default" register map used in 64bit mode. */
1536 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1538 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1539 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1540 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1541 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1542 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1543 8,9,10,11,12,13,14,15, /* extended integer registers */
1544 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1547 /* Define the register numbers to be used in Dwarf debugging information.
1548 The SVR4 reference port C compiler uses the following register numbers
1549 in its Dwarf output code:
1550 0 for %eax (gcc regno = 0)
1551 1 for %ecx (gcc regno = 2)
1552 2 for %edx (gcc regno = 1)
1553 3 for %ebx (gcc regno = 3)
1554 4 for %esp (gcc regno = 7)
1555 5 for %ebp (gcc regno = 6)
1556 6 for %esi (gcc regno = 4)
1557 7 for %edi (gcc regno = 5)
1558 The following three DWARF register numbers are never generated by
1559 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1560 believes these numbers have these meanings.
1561 8 for %eip (no gcc equivalent)
1562 9 for %eflags (gcc regno = 17)
1563 10 for %trapno (no gcc equivalent)
1564 It is not at all clear how we should number the FP stack registers
1565 for the x86 architecture. If the version of SDB on x86/svr4 were
1566 a bit less brain dead with respect to floating-point then we would
1567 have a precedent to follow with respect to DWARF register numbers
1568 for x86 FP registers, but the SDB on x86/svr4 is so completely
1569 broken with respect to FP registers that it is hardly worth thinking
1570 of it as something to strive for compatibility with.
1571 The version of x86/svr4 SDB I have at the moment does (partially)
1572 seem to believe that DWARF register number 11 is associated with
1573 the x86 register %st(0), but that's about all. Higher DWARF
1574 register numbers don't seem to be associated with anything in
1575 particular, and even for DWARF regno 11, SDB only seems to under-
1576 stand that it should say that a variable lives in %st(0) (when
1577 asked via an `=' command) if we said it was in DWARF regno 11,
1578 but SDB still prints garbage when asked for the value of the
1579 variable in question (via a `/' command).
1580 (Also note that the labels SDB prints for various FP stack regs
1581 when doing an `x' command are all wrong.)
1582 Note that these problems generally don't affect the native SVR4
1583 C compiler because it doesn't allow the use of -O with -g and
1584 because when it is *not* optimizing, it allocates a memory
1585 location for each floating-point variable, and the memory
1586 location is what gets described in the DWARF AT_location
1587 attribute for the variable in question.
1588 Regardless of the severe mental illness of the x86/svr4 SDB, we
1589 do something sensible here and we use the following DWARF
1590 register numbers. Note that these are all stack-top-relative
1592 11 for %st(0) (gcc regno = 8)
1593 12 for %st(1) (gcc regno = 9)
1594 13 for %st(2) (gcc regno = 10)
1595 14 for %st(3) (gcc regno = 11)
1596 15 for %st(4) (gcc regno = 12)
1597 16 for %st(5) (gcc regno = 13)
1598 17 for %st(6) (gcc regno = 14)
1599 18 for %st(7) (gcc regno = 15)
1601 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1603 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1604 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1605 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1606 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1607 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1609 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1612 /* Test and compare insns in i386.md store the information needed to
1613 generate branch and scc insns here. */
1615 rtx ix86_compare_op0 = NULL_RTX;
1616 rtx ix86_compare_op1 = NULL_RTX;
1617 rtx ix86_compare_emitted = NULL_RTX;
1619 /* Size of the register save area. */
1620 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1622 /* Define the structure for the machine field in struct function. */
1624 struct stack_local_entry GTY(())
1626 unsigned short mode;
1629 struct stack_local_entry *next;
1632 /* Structure describing stack frame layout.
1633 Stack grows downward:
1639 saved frame pointer if frame_pointer_needed
1640 <- HARD_FRAME_POINTER
1645 [va_arg registers] (
1646 > to_allocate <- FRAME_POINTER
1656 HOST_WIDE_INT frame;
1658 int outgoing_arguments_size;
1661 HOST_WIDE_INT to_allocate;
1662 /* The offsets relative to ARG_POINTER. */
1663 HOST_WIDE_INT frame_pointer_offset;
1664 HOST_WIDE_INT hard_frame_pointer_offset;
1665 HOST_WIDE_INT stack_pointer_offset;
1667 /* When save_regs_using_mov is set, emit prologue using
1668 move instead of push instructions. */
1669 bool save_regs_using_mov;
1672 /* Code model option. */
1673 enum cmodel ix86_cmodel;
1675 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1677 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1679 /* Which unit we are generating floating point math for. */
1680 enum fpmath_unit ix86_fpmath;
1682 /* Which cpu are we scheduling for. */
1683 enum processor_type ix86_tune;
1685 /* Which instruction set architecture to use. */
1686 enum processor_type ix86_arch;
1688 /* true if sse prefetch instruction is not NOOP. */
1689 int x86_prefetch_sse;
1691 /* ix86_regparm_string as a number */
1692 static int ix86_regparm;
1694 /* -mstackrealign option */
1695 extern int ix86_force_align_arg_pointer;
1696 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1698 /* Preferred alignment for stack boundary in bits. */
1699 unsigned int ix86_preferred_stack_boundary;
1701 /* Values 1-5: see jump.c */
1702 int ix86_branch_cost;
1704 /* Variables which are this size or smaller are put in the data/bss
1705 or ldata/lbss sections. */
1707 int ix86_section_threshold = 65536;
1709 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1710 char internal_label_prefix[16];
1711 int internal_label_prefix_len;
1713 /* Fence to use after loop using movnt. */
1716 /* Register class used for passing given 64bit part of the argument.
1717 These represent classes as documented by the PS ABI, with the exception
1718 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1719 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1721 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1722 whenever possible (upper half does contain padding). */
1723 enum x86_64_reg_class
1726 X86_64_INTEGER_CLASS,
1727 X86_64_INTEGERSI_CLASS,
1734 X86_64_COMPLEX_X87_CLASS,
1737 static const char * const x86_64_reg_class_name[] =
1739 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1740 "sseup", "x87", "x87up", "cplx87", "no"
1743 #define MAX_CLASSES 4
1745 /* Table of constants used by fldpi, fldln2, etc.... */
1746 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1747 static bool ext_80387_constants_init = 0;
1750 static struct machine_function * ix86_init_machine_status (void);
1751 static rtx ix86_function_value (const_tree, const_tree, bool);
1752 static int ix86_function_regparm (const_tree, const_tree);
1753 static void ix86_compute_frame_layout (struct ix86_frame *);
1754 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1758 /* The svr4 ABI for the i386 says that records and unions are returned
1760 #ifndef DEFAULT_PCC_STRUCT_RETURN
1761 #define DEFAULT_PCC_STRUCT_RETURN 1
1764 /* Bit flags that specify the ISA we are compiling for. */
1765 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1767 /* A mask of ix86_isa_flags that includes bit X if X
1768 was set or cleared on the command line. */
1769 static int ix86_isa_flags_explicit;
1771 /* Define a set of ISAs which are available when a given ISA is
1772 enabled. MMX and SSE ISAs are handled separately. */
1774 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1775 #define OPTION_MASK_ISA_3DNOW_SET \
1776 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1778 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1779 #define OPTION_MASK_ISA_SSE2_SET \
1780 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1781 #define OPTION_MASK_ISA_SSE3_SET \
1782 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1783 #define OPTION_MASK_ISA_SSSE3_SET \
1784 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1785 #define OPTION_MASK_ISA_SSE4_1_SET \
1786 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1787 #define OPTION_MASK_ISA_SSE4_2_SET \
1788 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1790 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1792 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1794 #define OPTION_MASK_ISA_SSE4A_SET \
1795 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1796 #define OPTION_MASK_ISA_SSE5_SET \
1797 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1799 /* Define a set of ISAs which aren't available when a given ISA is
1800 disabled. MMX and SSE ISAs are handled separately. */
1802 #define OPTION_MASK_ISA_MMX_UNSET \
1803 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1804 #define OPTION_MASK_ISA_3DNOW_UNSET \
1805 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1806 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1808 #define OPTION_MASK_ISA_SSE_UNSET \
1809 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1810 #define OPTION_MASK_ISA_SSE2_UNSET \
1811 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1812 #define OPTION_MASK_ISA_SSE3_UNSET \
1813 (OPTION_MASK_ISA_SSE3 \
1814 | OPTION_MASK_ISA_SSSE3_UNSET \
1815 | OPTION_MASK_ISA_SSE4A_UNSET )
1816 #define OPTION_MASK_ISA_SSSE3_UNSET \
1817 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1818 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1819 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1820 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1822 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1824 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1826 #define OPTION_MASK_ISA_SSE4A_UNSET \
1827 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1829 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1831 /* Vectorization library interface and handlers. */
1832 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1833 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1835 /* Implement TARGET_HANDLE_OPTION. */
1838 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1845 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1846 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1850 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1851 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1858 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1859 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1863 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1864 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1874 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1875 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1879 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1880 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1887 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1888 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1892 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1893 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1900 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1901 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1905 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1906 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1913 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1914 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1918 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1919 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1926 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1927 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1931 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1932 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1939 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1940 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1944 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1945 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1950 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1951 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1955 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1956 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1962 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1963 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1967 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1968 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1975 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1976 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1980 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1981 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1990 /* Sometimes certain combinations of command options do not make
1991 sense on a particular target machine. You can define a macro
1992 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1993 defined, is executed once just after all the command options have
1996 Don't use this macro to turn on various extra optimizations for
1997 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2000 override_options (void)
2003 int ix86_tune_defaulted = 0;
2004 int ix86_arch_specified = 0;
2005 unsigned int ix86_arch_mask, ix86_tune_mask;
2007 /* Comes from final.c -- no real reason to change it. */
2008 #define MAX_CODE_ALIGN 16
2012 const struct processor_costs *cost; /* Processor costs */
2013 const int align_loop; /* Default alignments. */
2014 const int align_loop_max_skip;
2015 const int align_jump;
2016 const int align_jump_max_skip;
2017 const int align_func;
2019 const processor_target_table[PROCESSOR_max] =
2021 {&i386_cost, 4, 3, 4, 3, 4},
2022 {&i486_cost, 16, 15, 16, 15, 16},
2023 {&pentium_cost, 16, 7, 16, 7, 16},
2024 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2025 {&geode_cost, 0, 0, 0, 0, 0},
2026 {&k6_cost, 32, 7, 32, 7, 32},
2027 {&athlon_cost, 16, 7, 16, 7, 16},
2028 {&pentium4_cost, 0, 0, 0, 0, 0},
2029 {&k8_cost, 16, 7, 16, 7, 16},
2030 {&nocona_cost, 0, 0, 0, 0, 0},
2031 {&core2_cost, 16, 10, 16, 10, 16},
2032 {&generic32_cost, 16, 7, 16, 7, 16},
2033 {&generic64_cost, 16, 10, 16, 10, 16},
2034 {&amdfam10_cost, 32, 24, 32, 7, 32}
2037 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2068 PTA_PREFETCH_SSE = 1 << 4,
2070 PTA_3DNOW_A = 1 << 6,
2074 PTA_POPCNT = 1 << 10,
2076 PTA_SSE4A = 1 << 12,
2077 PTA_NO_SAHF = 1 << 13,
2078 PTA_SSE4_1 = 1 << 14,
2079 PTA_SSE4_2 = 1 << 15,
2085 const char *const name; /* processor name or nickname. */
2086 const enum processor_type processor;
2087 const unsigned /*enum pta_flags*/ flags;
2089 const processor_alias_table[] =
2091 {"i386", PROCESSOR_I386, 0},
2092 {"i486", PROCESSOR_I486, 0},
2093 {"i586", PROCESSOR_PENTIUM, 0},
2094 {"pentium", PROCESSOR_PENTIUM, 0},
2095 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2096 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2097 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2098 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2099 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2100 {"i686", PROCESSOR_PENTIUMPRO, 0},
2101 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2102 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2103 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2104 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2105 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2106 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2107 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2108 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2109 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2110 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2111 | PTA_CX16 | PTA_NO_SAHF)},
2112 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2113 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2116 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2117 |PTA_PREFETCH_SSE)},
2118 {"k6", PROCESSOR_K6, PTA_MMX},
2119 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2120 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2121 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2122 | PTA_PREFETCH_SSE)},
2123 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2124 | PTA_PREFETCH_SSE)},
2125 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2127 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2129 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2131 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2132 | PTA_MMX | PTA_SSE | PTA_SSE2
2134 {"k8", PROCESSOR_K8, (PTA_64BIT
2135 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2136 | PTA_SSE | PTA_SSE2
2138 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2139 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2140 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2142 {"opteron", PROCESSOR_K8, (PTA_64BIT
2143 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2144 | PTA_SSE | PTA_SSE2
2146 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2147 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2148 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2150 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2151 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2152 | PTA_SSE | PTA_SSE2
2154 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2155 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2156 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2158 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2159 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2160 | PTA_SSE | PTA_SSE2
2162 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2163 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2164 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2166 | PTA_CX16 | PTA_ABM)},
2167 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2168 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2169 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2171 | PTA_CX16 | PTA_ABM)},
2172 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2173 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2176 int const pta_size = ARRAY_SIZE (processor_alias_table);
2178 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2179 SUBTARGET_OVERRIDE_OPTIONS;
2182 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2183 SUBSUBTARGET_OVERRIDE_OPTIONS;
2186 /* -fPIC is the default for x86_64. */
2187 if (TARGET_MACHO && TARGET_64BIT)
2190 /* Set the default values for switches whose default depends on TARGET_64BIT
2191 in case they weren't overwritten by command line options. */
2194 /* Mach-O doesn't support omitting the frame pointer for now. */
2195 if (flag_omit_frame_pointer == 2)
2196 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2197 if (flag_asynchronous_unwind_tables == 2)
2198 flag_asynchronous_unwind_tables = 1;
2199 if (flag_pcc_struct_return == 2)
2200 flag_pcc_struct_return = 0;
2204 if (flag_omit_frame_pointer == 2)
2205 flag_omit_frame_pointer = 0;
2206 if (flag_asynchronous_unwind_tables == 2)
2207 flag_asynchronous_unwind_tables = 0;
2208 if (flag_pcc_struct_return == 2)
2209 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2212 /* Need to check -mtune=generic first. */
2213 if (ix86_tune_string)
2215 if (!strcmp (ix86_tune_string, "generic")
2216 || !strcmp (ix86_tune_string, "i686")
2217 /* As special support for cross compilers we read -mtune=native
2218 as -mtune=generic. With native compilers we won't see the
2219 -mtune=native, as it was changed by the driver. */
2220 || !strcmp (ix86_tune_string, "native"))
2223 ix86_tune_string = "generic64";
2225 ix86_tune_string = "generic32";
2227 else if (!strncmp (ix86_tune_string, "generic", 7))
2228 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2232 if (ix86_arch_string)
2233 ix86_tune_string = ix86_arch_string;
2234 if (!ix86_tune_string)
2236 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2237 ix86_tune_defaulted = 1;
2240 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2241 need to use a sensible tune option. */
2242 if (!strcmp (ix86_tune_string, "generic")
2243 || !strcmp (ix86_tune_string, "x86-64")
2244 || !strcmp (ix86_tune_string, "i686"))
2247 ix86_tune_string = "generic64";
2249 ix86_tune_string = "generic32";
2252 if (ix86_stringop_string)
2254 if (!strcmp (ix86_stringop_string, "rep_byte"))
2255 stringop_alg = rep_prefix_1_byte;
2256 else if (!strcmp (ix86_stringop_string, "libcall"))
2257 stringop_alg = libcall;
2258 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2259 stringop_alg = rep_prefix_4_byte;
2260 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2261 stringop_alg = rep_prefix_8_byte;
2262 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2263 stringop_alg = loop_1_byte;
2264 else if (!strcmp (ix86_stringop_string, "loop"))
2265 stringop_alg = loop;
2266 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2267 stringop_alg = unrolled_loop;
2269 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2271 if (!strcmp (ix86_tune_string, "x86-64"))
2272 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2273 "-mtune=generic instead as appropriate.");
2275 if (!ix86_arch_string)
2276 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2278 ix86_arch_specified = 1;
2280 if (!strcmp (ix86_arch_string, "generic"))
2281 error ("generic CPU can be used only for -mtune= switch");
2282 if (!strncmp (ix86_arch_string, "generic", 7))
2283 error ("bad value (%s) for -march= switch", ix86_arch_string);
2285 if (ix86_cmodel_string != 0)
2287 if (!strcmp (ix86_cmodel_string, "small"))
2288 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2289 else if (!strcmp (ix86_cmodel_string, "medium"))
2290 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2291 else if (!strcmp (ix86_cmodel_string, "large"))
2292 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2294 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2295 else if (!strcmp (ix86_cmodel_string, "32"))
2296 ix86_cmodel = CM_32;
2297 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2298 ix86_cmodel = CM_KERNEL;
2300 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2304 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2305 use of rip-relative addressing. This eliminates fixups that
2306 would otherwise be needed if this object is to be placed in a
2307 DLL, and is essentially just as efficient as direct addressing. */
2308 if (TARGET_64BIT_MS_ABI)
2309 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2310 else if (TARGET_64BIT)
2311 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2313 ix86_cmodel = CM_32;
2315 if (ix86_asm_string != 0)
2318 && !strcmp (ix86_asm_string, "intel"))
2319 ix86_asm_dialect = ASM_INTEL;
2320 else if (!strcmp (ix86_asm_string, "att"))
2321 ix86_asm_dialect = ASM_ATT;
2323 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2325 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2326 error ("code model %qs not supported in the %s bit mode",
2327 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2328 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2329 sorry ("%i-bit mode not compiled in",
2330 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2332 for (i = 0; i < pta_size; i++)
2333 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2335 ix86_arch = processor_alias_table[i].processor;
2336 /* Default cpu tuning to the architecture. */
2337 ix86_tune = ix86_arch;
2339 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2340 error ("CPU you selected does not support x86-64 "
2343 if (processor_alias_table[i].flags & PTA_MMX
2344 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2345 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2346 if (processor_alias_table[i].flags & PTA_3DNOW
2347 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2348 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2349 if (processor_alias_table[i].flags & PTA_3DNOW_A
2350 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2351 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2352 if (processor_alias_table[i].flags & PTA_SSE
2353 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2354 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2355 if (processor_alias_table[i].flags & PTA_SSE2
2356 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2357 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2358 if (processor_alias_table[i].flags & PTA_SSE3
2359 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2360 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2361 if (processor_alias_table[i].flags & PTA_SSSE3
2362 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2363 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2364 if (processor_alias_table[i].flags & PTA_SSE4_1
2365 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2366 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2367 if (processor_alias_table[i].flags & PTA_SSE4_2
2368 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2369 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2370 if (processor_alias_table[i].flags & PTA_SSE4A
2371 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2372 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2373 if (processor_alias_table[i].flags & PTA_SSE5
2374 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2375 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2377 if (processor_alias_table[i].flags & PTA_ABM)
2379 if (processor_alias_table[i].flags & PTA_CX16)
2380 x86_cmpxchg16b = true;
2381 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2383 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2384 x86_prefetch_sse = true;
2385 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2392 error ("bad value (%s) for -march= switch", ix86_arch_string);
2394 ix86_arch_mask = 1u << ix86_arch;
2395 for (i = 0; i < X86_ARCH_LAST; ++i)
2396 ix86_arch_features[i] &= ix86_arch_mask;
2398 for (i = 0; i < pta_size; i++)
2399 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2401 ix86_tune = processor_alias_table[i].processor;
2402 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2404 if (ix86_tune_defaulted)
2406 ix86_tune_string = "x86-64";
2407 for (i = 0; i < pta_size; i++)
2408 if (! strcmp (ix86_tune_string,
2409 processor_alias_table[i].name))
2411 ix86_tune = processor_alias_table[i].processor;
2414 error ("CPU you selected does not support x86-64 "
2417 /* Intel CPUs have always interpreted SSE prefetch instructions as
2418 NOPs; so, we can enable SSE prefetch instructions even when
2419 -mtune (rather than -march) points us to a processor that has them.
2420 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2421 higher processors. */
2423 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2424 x86_prefetch_sse = true;
2428 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2430 ix86_tune_mask = 1u << ix86_tune;
2431 for (i = 0; i < X86_TUNE_LAST; ++i)
2432 ix86_tune_features[i] &= ix86_tune_mask;
2435 ix86_cost = &size_cost;
2437 ix86_cost = processor_target_table[ix86_tune].cost;
2439 /* Arrange to set up i386_stack_locals for all functions. */
2440 init_machine_status = ix86_init_machine_status;
2442 /* Validate -mregparm= value. */
2443 if (ix86_regparm_string)
2446 warning (0, "-mregparm is ignored in 64-bit mode");
2447 i = atoi (ix86_regparm_string);
2448 if (i < 0 || i > REGPARM_MAX)
2449 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2454 ix86_regparm = REGPARM_MAX;
2456 /* If the user has provided any of the -malign-* options,
2457 warn and use that value only if -falign-* is not set.
2458 Remove this code in GCC 3.2 or later. */
2459 if (ix86_align_loops_string)
2461 warning (0, "-malign-loops is obsolete, use -falign-loops");
2462 if (align_loops == 0)
2464 i = atoi (ix86_align_loops_string);
2465 if (i < 0 || i > MAX_CODE_ALIGN)
2466 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2468 align_loops = 1 << i;
2472 if (ix86_align_jumps_string)
2474 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2475 if (align_jumps == 0)
2477 i = atoi (ix86_align_jumps_string);
2478 if (i < 0 || i > MAX_CODE_ALIGN)
2479 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2481 align_jumps = 1 << i;
2485 if (ix86_align_funcs_string)
2487 warning (0, "-malign-functions is obsolete, use -falign-functions");
2488 if (align_functions == 0)
2490 i = atoi (ix86_align_funcs_string);
2491 if (i < 0 || i > MAX_CODE_ALIGN)
2492 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2494 align_functions = 1 << i;
2498 /* Default align_* from the processor table. */
2499 if (align_loops == 0)
2501 align_loops = processor_target_table[ix86_tune].align_loop;
2502 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2504 if (align_jumps == 0)
2506 align_jumps = processor_target_table[ix86_tune].align_jump;
2507 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2509 if (align_functions == 0)
2511 align_functions = processor_target_table[ix86_tune].align_func;
2514 /* Validate -mbranch-cost= value, or provide default. */
2515 ix86_branch_cost = ix86_cost->branch_cost;
2516 if (ix86_branch_cost_string)
2518 i = atoi (ix86_branch_cost_string);
2520 error ("-mbranch-cost=%d is not between 0 and 5", i);
2522 ix86_branch_cost = i;
2524 if (ix86_section_threshold_string)
2526 i = atoi (ix86_section_threshold_string);
2528 error ("-mlarge-data-threshold=%d is negative", i);
2530 ix86_section_threshold = i;
2533 if (ix86_tls_dialect_string)
2535 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2536 ix86_tls_dialect = TLS_DIALECT_GNU;
2537 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2538 ix86_tls_dialect = TLS_DIALECT_GNU2;
2539 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2540 ix86_tls_dialect = TLS_DIALECT_SUN;
2542 error ("bad value (%s) for -mtls-dialect= switch",
2543 ix86_tls_dialect_string);
2546 if (ix87_precision_string)
2548 i = atoi (ix87_precision_string);
2549 if (i != 32 && i != 64 && i != 80)
2550 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2555 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2557 /* Enable by default the SSE and MMX builtins. Do allow the user to
2558 explicitly disable any of these. In particular, disabling SSE and
2559 MMX for kernel code is extremely useful. */
2560 if (!ix86_arch_specified)
2562 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2563 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2566 warning (0, "-mrtd is ignored in 64bit mode");
2570 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2572 if (!ix86_arch_specified)
2574 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2576 /* i386 ABI does not specify red zone. It still makes sense to use it
2577 when programmer takes care to stack from being destroyed. */
2578 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2579 target_flags |= MASK_NO_RED_ZONE;
2582 /* Keep nonleaf frame pointers. */
2583 if (flag_omit_frame_pointer)
2584 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2585 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2586 flag_omit_frame_pointer = 1;
2588 /* If we're doing fast math, we don't care about comparison order
2589 wrt NaNs. This lets us use a shorter comparison sequence. */
2590 if (flag_finite_math_only)
2591 target_flags &= ~MASK_IEEE_FP;
2593 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2594 since the insns won't need emulation. */
2595 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2596 target_flags &= ~MASK_NO_FANCY_MATH_387;
2598 /* Likewise, if the target doesn't have a 387, or we've specified
2599 software floating point, don't use 387 inline intrinsics. */
2601 target_flags |= MASK_NO_FANCY_MATH_387;
2603 /* Turn on MMX builtins for -msse. */
2606 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2607 x86_prefetch_sse = true;
2610 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2611 if (TARGET_SSE4_2 || TARGET_ABM)
2614 /* Validate -mpreferred-stack-boundary= value, or provide default.
2615 The default of 128 bits is for Pentium III's SSE __m128. We can't
2616 change it because of optimize_size. Otherwise, we can't mix object
2617 files compiled with -Os and -On. */
2618 ix86_preferred_stack_boundary = 128;
2619 if (ix86_preferred_stack_boundary_string)
2621 i = atoi (ix86_preferred_stack_boundary_string);
2622 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2623 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2624 TARGET_64BIT ? 4 : 2);
2626 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2629 /* Accept -msseregparm only if at least SSE support is enabled. */
2630 if (TARGET_SSEREGPARM
2632 error ("-msseregparm used without SSE enabled");
2634 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2635 if (ix86_fpmath_string != 0)
2637 if (! strcmp (ix86_fpmath_string, "387"))
2638 ix86_fpmath = FPMATH_387;
2639 else if (! strcmp (ix86_fpmath_string, "sse"))
2643 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2644 ix86_fpmath = FPMATH_387;
2647 ix86_fpmath = FPMATH_SSE;
2649 else if (! strcmp (ix86_fpmath_string, "387,sse")
2650 || ! strcmp (ix86_fpmath_string, "sse,387"))
2654 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2655 ix86_fpmath = FPMATH_387;
2657 else if (!TARGET_80387)
2659 warning (0, "387 instruction set disabled, using SSE arithmetics");
2660 ix86_fpmath = FPMATH_SSE;
2663 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2666 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2669 /* If the i387 is disabled, then do not return values in it. */
2671 target_flags &= ~MASK_FLOAT_RETURNS;
2673 /* Use external vectorized library in vectorizing intrinsics. */
2674 if (ix86_veclibabi_string)
2676 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2677 ix86_veclib_handler = ix86_veclibabi_acml;
2679 error ("unknown vectorization library ABI type (%s) for "
2680 "-mveclibabi= switch", ix86_veclibabi_string);
2683 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2684 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2686 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2688 /* ??? Unwind info is not correct around the CFG unless either a frame
2689 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2690 unwind info generation to be aware of the CFG and propagating states
2692 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2693 || flag_exceptions || flag_non_call_exceptions)
2694 && flag_omit_frame_pointer
2695 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2697 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2698 warning (0, "unwind tables currently require either a frame pointer "
2699 "or -maccumulate-outgoing-args for correctness");
2700 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2703 /* For sane SSE instruction set generation we need fcomi instruction.
2704 It is safe to enable all CMOVE instructions. */
2708 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2711 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2712 p = strchr (internal_label_prefix, 'X');
2713 internal_label_prefix_len = p - internal_label_prefix;
2717 /* When scheduling description is not available, disable scheduler pass
2718 so it won't slow down the compilation and make x87 code slower. */
2719 if (!TARGET_SCHEDULE)
2720 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2722 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2723 set_param_value ("simultaneous-prefetches",
2724 ix86_cost->simultaneous_prefetches);
2725 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2726 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2727 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2728 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2729 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2730 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2732 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2733 can be optimized to ap = __builtin_next_arg (0). */
2734 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2735 targetm.expand_builtin_va_start = NULL;
2738 /* Return true if this goes in large data/bss. */
2741 ix86_in_large_data_p (tree exp)
2743 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2746 /* Functions are never large data. */
2747 if (TREE_CODE (exp) == FUNCTION_DECL)
2750 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2752 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2753 if (strcmp (section, ".ldata") == 0
2754 || strcmp (section, ".lbss") == 0)
2760 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2762 /* If this is an incomplete type with size 0, then we can't put it
2763 in data because it might be too big when completed. */
2764 if (!size || size > ix86_section_threshold)
2771 /* Switch to the appropriate section for output of DECL.
2772 DECL is either a `VAR_DECL' node or a constant of some sort.
2773 RELOC indicates whether forming the initial value of DECL requires
2774 link-time relocations. */
2776 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2780 x86_64_elf_select_section (tree decl, int reloc,
2781 unsigned HOST_WIDE_INT align)
2783 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2784 && ix86_in_large_data_p (decl))
2786 const char *sname = NULL;
2787 unsigned int flags = SECTION_WRITE;
2788 switch (categorize_decl_for_section (decl, reloc))
2793 case SECCAT_DATA_REL:
2794 sname = ".ldata.rel";
2796 case SECCAT_DATA_REL_LOCAL:
2797 sname = ".ldata.rel.local";
2799 case SECCAT_DATA_REL_RO:
2800 sname = ".ldata.rel.ro";
2802 case SECCAT_DATA_REL_RO_LOCAL:
2803 sname = ".ldata.rel.ro.local";
2807 flags |= SECTION_BSS;
2810 case SECCAT_RODATA_MERGE_STR:
2811 case SECCAT_RODATA_MERGE_STR_INIT:
2812 case SECCAT_RODATA_MERGE_CONST:
2816 case SECCAT_SRODATA:
2823 /* We don't split these for medium model. Place them into
2824 default sections and hope for best. */
2829 /* We might get called with string constants, but get_named_section
2830 doesn't like them as they are not DECLs. Also, we need to set
2831 flags in that case. */
2833 return get_section (sname, flags, NULL);
2834 return get_named_section (decl, sname, reloc);
2837 return default_elf_select_section (decl, reloc, align);
2840 /* Build up a unique section name, expressed as a
2841 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2842 RELOC indicates whether the initial value of EXP requires
2843 link-time relocations. */
2845 static void ATTRIBUTE_UNUSED
2846 x86_64_elf_unique_section (tree decl, int reloc)
2848 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2849 && ix86_in_large_data_p (decl))
2851 const char *prefix = NULL;
2852 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2853 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2855 switch (categorize_decl_for_section (decl, reloc))
2858 case SECCAT_DATA_REL:
2859 case SECCAT_DATA_REL_LOCAL:
2860 case SECCAT_DATA_REL_RO:
2861 case SECCAT_DATA_REL_RO_LOCAL:
2862 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2865 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2868 case SECCAT_RODATA_MERGE_STR:
2869 case SECCAT_RODATA_MERGE_STR_INIT:
2870 case SECCAT_RODATA_MERGE_CONST:
2871 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2873 case SECCAT_SRODATA:
2880 /* We don't split these for medium model. Place them into
2881 default sections and hope for best. */
2889 plen = strlen (prefix);
2891 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2892 name = targetm.strip_name_encoding (name);
2893 nlen = strlen (name);
2895 string = (char *) alloca (nlen + plen + 1);
2896 memcpy (string, prefix, plen);
2897 memcpy (string + plen, name, nlen + 1);
2899 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2903 default_unique_section (decl, reloc);
2906 #ifdef COMMON_ASM_OP
2907 /* This says how to output assembler code to declare an
2908 uninitialized external linkage data object.
2910 For medium model x86-64 we need to use .largecomm opcode for
2913 x86_elf_aligned_common (FILE *file,
2914 const char *name, unsigned HOST_WIDE_INT size,
2917 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2918 && size > (unsigned int)ix86_section_threshold)
2919 fprintf (file, ".largecomm\t");
2921 fprintf (file, "%s", COMMON_ASM_OP);
2922 assemble_name (file, name);
2923 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2924 size, align / BITS_PER_UNIT);
2928 /* Utility function for targets to use in implementing
2929 ASM_OUTPUT_ALIGNED_BSS. */
2932 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2933 const char *name, unsigned HOST_WIDE_INT size,
2936 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2937 && size > (unsigned int)ix86_section_threshold)
2938 switch_to_section (get_named_section (decl, ".lbss", 0));
2940 switch_to_section (bss_section);
2941 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2942 #ifdef ASM_DECLARE_OBJECT_NAME
2943 last_assemble_variable_decl = decl;
2944 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2946 /* Standard thing is just output label for the object. */
2947 ASM_OUTPUT_LABEL (file, name);
2948 #endif /* ASM_DECLARE_OBJECT_NAME */
2949 ASM_OUTPUT_SKIP (file, size ? size : 1);
2953 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2955 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2956 make the problem with not enough registers even worse. */
2957 #ifdef INSN_SCHEDULING
2959 flag_schedule_insns = 0;
2963 /* The Darwin libraries never set errno, so we might as well
2964 avoid calling them when that's the only reason we would. */
2965 flag_errno_math = 0;
2967 /* The default values of these switches depend on the TARGET_64BIT
2968 that is not known at this moment. Mark these values with 2 and
2969 let user the to override these. In case there is no command line option
2970 specifying them, we will set the defaults in override_options. */
2972 flag_omit_frame_pointer = 2;
2973 flag_pcc_struct_return = 2;
2974 flag_asynchronous_unwind_tables = 2;
2975 flag_vect_cost_model = 1;
2976 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2977 SUBTARGET_OPTIMIZATION_OPTIONS;
2981 /* Decide whether we can make a sibling call to a function. DECL is the
2982 declaration of the function being targeted by the call and EXP is the
2983 CALL_EXPR representing the call. */
2986 ix86_function_ok_for_sibcall (tree decl, tree exp)
2991 /* If we are generating position-independent code, we cannot sibcall
2992 optimize any indirect call, or a direct call to a global function,
2993 as the PLT requires %ebx be live. */
2994 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3001 func = TREE_TYPE (CALL_EXPR_FN (exp));
3002 if (POINTER_TYPE_P (func))
3003 func = TREE_TYPE (func);
3006 /* Check that the return value locations are the same. Like
3007 if we are returning floats on the 80387 register stack, we cannot
3008 make a sibcall from a function that doesn't return a float to a
3009 function that does or, conversely, from a function that does return
3010 a float to a function that doesn't; the necessary stack adjustment
3011 would not be executed. This is also the place we notice
3012 differences in the return value ABI. Note that it is ok for one
3013 of the functions to have void return type as long as the return
3014 value of the other is passed in a register. */
3015 a = ix86_function_value (TREE_TYPE (exp), func, false);
3016 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3018 if (STACK_REG_P (a) || STACK_REG_P (b))
3020 if (!rtx_equal_p (a, b))
3023 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3025 else if (!rtx_equal_p (a, b))
3028 /* If this call is indirect, we'll need to be able to use a call-clobbered
3029 register for the address of the target function. Make sure that all
3030 such registers are not used for passing parameters. */
3031 if (!decl && !TARGET_64BIT)
3035 /* We're looking at the CALL_EXPR, we need the type of the function. */
3036 type = CALL_EXPR_FN (exp); /* pointer expression */
3037 type = TREE_TYPE (type); /* pointer type */
3038 type = TREE_TYPE (type); /* function type */
3040 if (ix86_function_regparm (type, NULL) >= 3)
3042 /* ??? Need to count the actual number of registers to be used,
3043 not the possible number of registers. Fix later. */
3048 /* Dllimport'd functions are also called indirectly. */
3049 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3050 && decl && DECL_DLLIMPORT_P (decl)
3051 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3054 /* If we forced aligned the stack, then sibcalling would unalign the
3055 stack, which may break the called function. */
3056 if (cfun->machine->force_align_arg_pointer)
3059 /* Otherwise okay. That also includes certain types of indirect calls. */
3063 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3064 calling convention attributes;
3065 arguments as in struct attribute_spec.handler. */
3068 ix86_handle_cconv_attribute (tree *node, tree name,
3070 int flags ATTRIBUTE_UNUSED,
3073 if (TREE_CODE (*node) != FUNCTION_TYPE
3074 && TREE_CODE (*node) != METHOD_TYPE
3075 && TREE_CODE (*node) != FIELD_DECL
3076 && TREE_CODE (*node) != TYPE_DECL)
3078 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3079 IDENTIFIER_POINTER (name));
3080 *no_add_attrs = true;
3084 /* Can combine regparm with all attributes but fastcall. */
3085 if (is_attribute_p ("regparm", name))
3089 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3091 error ("fastcall and regparm attributes are not compatible");
3094 cst = TREE_VALUE (args);
3095 if (TREE_CODE (cst) != INTEGER_CST)
3097 warning (OPT_Wattributes,
3098 "%qs attribute requires an integer constant argument",
3099 IDENTIFIER_POINTER (name));
3100 *no_add_attrs = true;
3102 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3104 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3105 IDENTIFIER_POINTER (name), REGPARM_MAX);
3106 *no_add_attrs = true;
3110 && lookup_attribute (ix86_force_align_arg_pointer_string,
3111 TYPE_ATTRIBUTES (*node))
3112 && compare_tree_int (cst, REGPARM_MAX-1))
3114 error ("%s functions limited to %d register parameters",
3115 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3123 /* Do not warn when emulating the MS ABI. */
3124 if (!TARGET_64BIT_MS_ABI)
3125 warning (OPT_Wattributes, "%qs attribute ignored",
3126 IDENTIFIER_POINTER (name));
3127 *no_add_attrs = true;
3131 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3132 if (is_attribute_p ("fastcall", name))
3134 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3136 error ("fastcall and cdecl attributes are not compatible");
3138 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3140 error ("fastcall and stdcall attributes are not compatible");
3142 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3144 error ("fastcall and regparm attributes are not compatible");
3148 /* Can combine stdcall with fastcall (redundant), regparm and
3150 else if (is_attribute_p ("stdcall", name))
3152 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3154 error ("stdcall and cdecl attributes are not compatible");
3156 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3158 error ("stdcall and fastcall attributes are not compatible");
3162 /* Can combine cdecl with regparm and sseregparm. */
3163 else if (is_attribute_p ("cdecl", name))
3165 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3167 error ("stdcall and cdecl attributes are not compatible");
3169 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3171 error ("fastcall and cdecl attributes are not compatible");
3175 /* Can combine sseregparm with all attributes. */
3180 /* Return 0 if the attributes for two types are incompatible, 1 if they
3181 are compatible, and 2 if they are nearly compatible (which causes a
3182 warning to be generated). */
3185 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3187 /* Check for mismatch of non-default calling convention. */
3188 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3190 if (TREE_CODE (type1) != FUNCTION_TYPE
3191 && TREE_CODE (type1) != METHOD_TYPE)
3194 /* Check for mismatched fastcall/regparm types. */
3195 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3196 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3197 || (ix86_function_regparm (type1, NULL)
3198 != ix86_function_regparm (type2, NULL)))
3201 /* Check for mismatched sseregparm types. */
3202 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3203 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3206 /* Check for mismatched return types (cdecl vs stdcall). */
3207 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3208 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3214 /* Return the regparm value for a function with the indicated TYPE and DECL.
3215 DECL may be NULL when calling function indirectly
3216 or considering a libcall. */
3219 ix86_function_regparm (const_tree type, const_tree decl)
3222 int regparm = ix86_regparm;
3227 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3229 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3231 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3234 /* Use register calling convention for local functions when possible. */
3235 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3236 && flag_unit_at_a_time && !profile_flag)
3238 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3239 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3242 int local_regparm, globals = 0, regno;
3245 /* Make sure no regparm register is taken by a
3246 fixed register variable. */
3247 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3248 if (fixed_regs[local_regparm])
3251 /* We can't use regparm(3) for nested functions as these use
3252 static chain pointer in third argument. */
3253 if (local_regparm == 3
3254 && (decl_function_context (decl)
3255 || ix86_force_align_arg_pointer)
3256 && !DECL_NO_STATIC_CHAIN (decl))
3259 /* If the function realigns its stackpointer, the prologue will
3260 clobber %ecx. If we've already generated code for the callee,
3261 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3262 scanning the attributes for the self-realigning property. */
3263 f = DECL_STRUCT_FUNCTION (decl);
3264 if (local_regparm == 3
3265 && (f ? !!f->machine->force_align_arg_pointer
3266 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3267 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3270 /* Each fixed register usage increases register pressure,
3271 so less registers should be used for argument passing.
3272 This functionality can be overriden by an explicit
3274 for (regno = 0; regno <= DI_REG; regno++)
3275 if (fixed_regs[regno])
3279 = globals < local_regparm ? local_regparm - globals : 0;
3281 if (local_regparm > regparm)
3282 regparm = local_regparm;
3289 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3290 DFmode (2) arguments in SSE registers for a function with the
3291 indicated TYPE and DECL. DECL may be NULL when calling function
3292 indirectly or considering a libcall. Otherwise return 0. */
3295 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3297 gcc_assert (!TARGET_64BIT);
3299 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3300 by the sseregparm attribute. */
3301 if (TARGET_SSEREGPARM
3302 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3309 error ("Calling %qD with attribute sseregparm without "
3310 "SSE/SSE2 enabled", decl);
3312 error ("Calling %qT with attribute sseregparm without "
3313 "SSE/SSE2 enabled", type);
3321 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3322 (and DFmode for SSE2) arguments in SSE registers. */
3323 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3325 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3326 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3328 return TARGET_SSE2 ? 2 : 1;
3334 /* Return true if EAX is live at the start of the function. Used by
3335 ix86_expand_prologue to determine if we need special help before
3336 calling allocate_stack_worker. */
3339 ix86_eax_live_at_start_p (void)
3341 /* Cheat. Don't bother working forward from ix86_function_regparm
3342 to the function type to whether an actual argument is located in
3343 eax. Instead just look at cfg info, which is still close enough
3344 to correct at this point. This gives false positives for broken
3345 functions that might use uninitialized data that happens to be
3346 allocated in eax, but who cares? */
3347 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3350 /* Value is the number of bytes of arguments automatically
3351 popped when returning from a subroutine call.
3352 FUNDECL is the declaration node of the function (as a tree),
3353 FUNTYPE is the data type of the function (as a tree),
3354 or for a library call it is an identifier node for the subroutine name.
3355 SIZE is the number of bytes of arguments passed on the stack.
3357 On the 80386, the RTD insn may be used to pop them if the number
3358 of args is fixed, but if the number is variable then the caller
3359 must pop them all. RTD can't be used for library calls now
3360 because the library is compiled with the Unix compiler.
3361 Use of RTD is a selectable option, since it is incompatible with
3362 standard Unix calling sequences. If the option is not selected,
3363 the caller must always pop the args.
3365 The attribute stdcall is equivalent to RTD on a per module basis. */
3368 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3372 /* None of the 64-bit ABIs pop arguments. */
3376 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3378 /* Cdecl functions override -mrtd, and never pop the stack. */
3379 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3381 /* Stdcall and fastcall functions will pop the stack if not
3383 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3384 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3387 if (rtd && ! stdarg_p (funtype))
3391 /* Lose any fake structure return argument if it is passed on the stack. */
3392 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3393 && !KEEP_AGGREGATE_RETURN_POINTER)
3395 int nregs = ix86_function_regparm (funtype, fundecl);
3397 return GET_MODE_SIZE (Pmode);
3403 /* Argument support functions. */
3405 /* Return true when register may be used to pass function parameters. */
3407 ix86_function_arg_regno_p (int regno)
3410 const int *parm_regs;
3415 return (regno < REGPARM_MAX
3416 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3418 return (regno < REGPARM_MAX
3419 || (TARGET_MMX && MMX_REGNO_P (regno)
3420 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3421 || (TARGET_SSE && SSE_REGNO_P (regno)
3422 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3427 if (SSE_REGNO_P (regno) && TARGET_SSE)
3432 if (TARGET_SSE && SSE_REGNO_P (regno)
3433 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3437 /* RAX is used as hidden argument to va_arg functions. */
3438 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3441 if (TARGET_64BIT_MS_ABI)
3442 parm_regs = x86_64_ms_abi_int_parameter_registers;
3444 parm_regs = x86_64_int_parameter_registers;
3445 for (i = 0; i < REGPARM_MAX; i++)
3446 if (regno == parm_regs[i])
3451 /* Return if we do not know how to pass TYPE solely in registers. */
3454 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3456 if (must_pass_in_stack_var_size_or_pad (mode, type))
3459 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3460 The layout_type routine is crafty and tries to trick us into passing
3461 currently unsupported vector types on the stack by using TImode. */
3462 return (!TARGET_64BIT && mode == TImode
3463 && type && TREE_CODE (type) != VECTOR_TYPE);
3466 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3467 for a call to a function whose data type is FNTYPE.
3468 For a library call, FNTYPE is 0. */
3471 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3472 tree fntype, /* tree ptr for function decl */
3473 rtx libname, /* SYMBOL_REF of library name or 0 */
3476 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3477 memset (cum, 0, sizeof (*cum));
3479 /* Set up the number of registers to use for passing arguments. */
3480 cum->nregs = ix86_regparm;
3482 cum->sse_nregs = SSE_REGPARM_MAX;
3484 cum->mmx_nregs = MMX_REGPARM_MAX;
3485 cum->warn_sse = true;
3486 cum->warn_mmx = true;
3488 /* Because type might mismatch in between caller and callee, we need to
3489 use actual type of function for local calls.
3490 FIXME: cgraph_analyze can be told to actually record if function uses
3491 va_start so for local functions maybe_vaarg can be made aggressive
3493 FIXME: once typesytem is fixed, we won't need this code anymore. */
3495 fntype = TREE_TYPE (fndecl);
3496 cum->maybe_vaarg = (fntype
3497 ? (!prototype_p (fntype) || stdarg_p (fntype))
3502 /* If there are variable arguments, then we won't pass anything
3503 in registers in 32-bit mode. */
3504 if (cum->maybe_vaarg)
3514 /* Use ecx and edx registers if function has fastcall attribute,
3515 else look for regparm information. */
3518 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3524 cum->nregs = ix86_function_regparm (fntype, fndecl);
3527 /* Set up the number of SSE registers used for passing SFmode
3528 and DFmode arguments. Warn for mismatching ABI. */
3529 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3533 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3534 But in the case of vector types, it is some vector mode.
3536 When we have only some of our vector isa extensions enabled, then there
3537 are some modes for which vector_mode_supported_p is false. For these
3538 modes, the generic vector support in gcc will choose some non-vector mode
3539 in order to implement the type. By computing the natural mode, we'll
3540 select the proper ABI location for the operand and not depend on whatever
3541 the middle-end decides to do with these vector types. */
3543 static enum machine_mode
3544 type_natural_mode (const_tree type)
3546 enum machine_mode mode = TYPE_MODE (type);
3548 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3550 HOST_WIDE_INT size = int_size_in_bytes (type);
3551 if ((size == 8 || size == 16)
3552 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3553 && TYPE_VECTOR_SUBPARTS (type) > 1)
3555 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3557 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3558 mode = MIN_MODE_VECTOR_FLOAT;
3560 mode = MIN_MODE_VECTOR_INT;
3562 /* Get the mode which has this inner mode and number of units. */
3563 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3564 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3565 && GET_MODE_INNER (mode) == innermode)
3575 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3576 this may not agree with the mode that the type system has chosen for the
3577 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3578 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3581 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3586 if (orig_mode != BLKmode)
3587 tmp = gen_rtx_REG (orig_mode, regno);
3590 tmp = gen_rtx_REG (mode, regno);
3591 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3592 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3598 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3599 of this code is to classify each 8bytes of incoming argument by the register
3600 class and assign registers accordingly. */
3602 /* Return the union class of CLASS1 and CLASS2.
3603 See the x86-64 PS ABI for details. */
3605 static enum x86_64_reg_class
3606 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3608 /* Rule #1: If both classes are equal, this is the resulting class. */
3609 if (class1 == class2)
3612 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3614 if (class1 == X86_64_NO_CLASS)
3616 if (class2 == X86_64_NO_CLASS)
3619 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3620 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3621 return X86_64_MEMORY_CLASS;
3623 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3624 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3625 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3626 return X86_64_INTEGERSI_CLASS;
3627 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3628 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3629 return X86_64_INTEGER_CLASS;
3631 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3633 if (class1 == X86_64_X87_CLASS
3634 || class1 == X86_64_X87UP_CLASS
3635 || class1 == X86_64_COMPLEX_X87_CLASS
3636 || class2 == X86_64_X87_CLASS
3637 || class2 == X86_64_X87UP_CLASS
3638 || class2 == X86_64_COMPLEX_X87_CLASS)
3639 return X86_64_MEMORY_CLASS;
3641 /* Rule #6: Otherwise class SSE is used. */
3642 return X86_64_SSE_CLASS;
3645 /* Classify the argument of type TYPE and mode MODE.
3646 CLASSES will be filled by the register class used to pass each word
3647 of the operand. The number of words is returned. In case the parameter
3648 should be passed in memory, 0 is returned. As a special case for zero
3649 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3651 BIT_OFFSET is used internally for handling records and specifies offset
3652 of the offset in bits modulo 256 to avoid overflow cases.
3654 See the x86-64 PS ABI for details.
3658 classify_argument (enum machine_mode mode, const_tree type,
3659 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3661 HOST_WIDE_INT bytes =
3662 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3663 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3665 /* Variable sized entities are always passed/returned in memory. */
3669 if (mode != VOIDmode
3670 && targetm.calls.must_pass_in_stack (mode, type))
3673 if (type && AGGREGATE_TYPE_P (type))
3677 enum x86_64_reg_class subclasses[MAX_CLASSES];
3679 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3683 for (i = 0; i < words; i++)
3684 classes[i] = X86_64_NO_CLASS;
3686 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3687 signalize memory class, so handle it as special case. */
3690 classes[0] = X86_64_NO_CLASS;
3694 /* Classify each field of record and merge classes. */
3695 switch (TREE_CODE (type))
3698 /* And now merge the fields of structure. */
3699 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3701 if (TREE_CODE (field) == FIELD_DECL)
3705 if (TREE_TYPE (field) == error_mark_node)
3708 /* Bitfields are always classified as integer. Handle them
3709 early, since later code would consider them to be
3710 misaligned integers. */
3711 if (DECL_BIT_FIELD (field))
3713 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3714 i < ((int_bit_position (field) + (bit_offset % 64))
3715 + tree_low_cst (DECL_SIZE (field), 0)
3718 merge_classes (X86_64_INTEGER_CLASS,
3723 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3724 TREE_TYPE (field), subclasses,
3725 (int_bit_position (field)
3726 + bit_offset) % 256);
3729 for (i = 0; i < num; i++)
3732 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3734 merge_classes (subclasses[i], classes[i + pos]);
3742 /* Arrays are handled as small records. */
3745 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3746 TREE_TYPE (type), subclasses, bit_offset);
3750 /* The partial classes are now full classes. */
3751 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3752 subclasses[0] = X86_64_SSE_CLASS;
3753 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3754 subclasses[0] = X86_64_INTEGER_CLASS;
3756 for (i = 0; i < words; i++)
3757 classes[i] = subclasses[i % num];
3762 case QUAL_UNION_TYPE:
3763 /* Unions are similar to RECORD_TYPE but offset is always 0.
3765 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3767 if (TREE_CODE (field) == FIELD_DECL)
3771 if (TREE_TYPE (field) == error_mark_node)
3774 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3775 TREE_TYPE (field), subclasses,
3779 for (i = 0; i < num; i++)
3780 classes[i] = merge_classes (subclasses[i], classes[i]);
3789 /* Final merger cleanup. */
3790 for (i = 0; i < words; i++)
3792 /* If one class is MEMORY, everything should be passed in
3794 if (classes[i] == X86_64_MEMORY_CLASS)
3797 /* The X86_64_SSEUP_CLASS should be always preceded by
3798 X86_64_SSE_CLASS. */
3799 if (classes[i] == X86_64_SSEUP_CLASS
3800 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3801 classes[i] = X86_64_SSE_CLASS;
3803 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3804 if (classes[i] == X86_64_X87UP_CLASS
3805 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3806 classes[i] = X86_64_SSE_CLASS;
3811 /* Compute alignment needed. We align all types to natural boundaries with
3812 exception of XFmode that is aligned to 64bits. */
3813 if (mode != VOIDmode && mode != BLKmode)
3815 int mode_alignment = GET_MODE_BITSIZE (mode);
3818 mode_alignment = 128;
3819 else if (mode == XCmode)
3820 mode_alignment = 256;
3821 if (COMPLEX_MODE_P (mode))
3822 mode_alignment /= 2;
3823 /* Misaligned fields are always returned in memory. */
3824 if (bit_offset % mode_alignment)
3828 /* for V1xx modes, just use the base mode */
3829 if (VECTOR_MODE_P (mode)
3830 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3831 mode = GET_MODE_INNER (mode);
3833 /* Classification of atomic types. */
3838 classes[0] = X86_64_SSE_CLASS;
3841 classes[0] = X86_64_SSE_CLASS;
3842 classes[1] = X86_64_SSEUP_CLASS;
3851 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3852 classes[0] = X86_64_INTEGERSI_CLASS;
3854 classes[0] = X86_64_INTEGER_CLASS;
3858 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3863 if (!(bit_offset % 64))
3864 classes[0] = X86_64_SSESF_CLASS;
3866 classes[0] = X86_64_SSE_CLASS;
3869 classes[0] = X86_64_SSEDF_CLASS;
3872 classes[0] = X86_64_X87_CLASS;
3873 classes[1] = X86_64_X87UP_CLASS;
3876 classes[0] = X86_64_SSE_CLASS;
3877 classes[1] = X86_64_SSEUP_CLASS;
3880 classes[0] = X86_64_SSE_CLASS;
3883 classes[0] = X86_64_SSEDF_CLASS;
3884 classes[1] = X86_64_SSEDF_CLASS;
3887 classes[0] = X86_64_COMPLEX_X87_CLASS;
3890 /* This modes is larger than 16 bytes. */
3898 classes[0] = X86_64_SSE_CLASS;
3899 classes[1] = X86_64_SSEUP_CLASS;
3905 classes[0] = X86_64_SSE_CLASS;
3911 gcc_assert (VECTOR_MODE_P (mode));
3916 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3918 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3919 classes[0] = X86_64_INTEGERSI_CLASS;
3921 classes[0] = X86_64_INTEGER_CLASS;
3922 classes[1] = X86_64_INTEGER_CLASS;
3923 return 1 + (bytes > 8);
3927 /* Examine the argument and return set number of register required in each
3928 class. Return 0 iff parameter should be passed in memory. */
3930 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3931 int *int_nregs, int *sse_nregs)
3933 enum x86_64_reg_class regclass[MAX_CLASSES];
3934 int n = classify_argument (mode, type, regclass, 0);
3940 for (n--; n >= 0; n--)
3941 switch (regclass[n])
3943 case X86_64_INTEGER_CLASS:
3944 case X86_64_INTEGERSI_CLASS:
3947 case X86_64_SSE_CLASS:
3948 case X86_64_SSESF_CLASS:
3949 case X86_64_SSEDF_CLASS:
3952 case X86_64_NO_CLASS:
3953 case X86_64_SSEUP_CLASS:
3955 case X86_64_X87_CLASS:
3956 case X86_64_X87UP_CLASS:
3960 case X86_64_COMPLEX_X87_CLASS:
3961 return in_return ? 2 : 0;
3962 case X86_64_MEMORY_CLASS:
3968 /* Construct container for the argument used by GCC interface. See
3969 FUNCTION_ARG for the detailed description. */
3972 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3973 const_tree type, int in_return, int nintregs, int nsseregs,
3974 const int *intreg, int sse_regno)
3976 /* The following variables hold the static issued_error state. */
3977 static bool issued_sse_arg_error;
3978 static bool issued_sse_ret_error;
3979 static bool issued_x87_ret_error;
3981 enum machine_mode tmpmode;
3983 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3984 enum x86_64_reg_class regclass[MAX_CLASSES];
3988 int needed_sseregs, needed_intregs;
3989 rtx exp[MAX_CLASSES];
3992 n = classify_argument (mode, type, regclass, 0);
3995 if (!examine_argument (mode, type, in_return, &needed_intregs,
3998 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4001 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4002 some less clueful developer tries to use floating-point anyway. */
4003 if (needed_sseregs && !TARGET_SSE)
4007 if (!issued_sse_ret_error)
4009 error ("SSE register return with SSE disabled");
4010 issued_sse_ret_error = true;
4013 else if (!issued_sse_arg_error)
4015 error ("SSE register argument with SSE disabled");
4016 issued_sse_arg_error = true;
4021 /* Likewise, error if the ABI requires us to return values in the
4022 x87 registers and the user specified -mno-80387. */
4023 if (!TARGET_80387 && in_return)
4024 for (i = 0; i < n; i++)
4025 if (regclass[i] == X86_64_X87_CLASS
4026 || regclass[i] == X86_64_X87UP_CLASS
4027 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4029 if (!issued_x87_ret_error)
4031 error ("x87 register return with x87 disabled");
4032 issued_x87_ret_error = true;
4037 /* First construct simple cases. Avoid SCmode, since we want to use
4038 single register to pass this type. */
4039 if (n == 1 && mode != SCmode)
4040 switch (regclass[0])
4042 case X86_64_INTEGER_CLASS:
4043 case X86_64_INTEGERSI_CLASS:
4044 return gen_rtx_REG (mode, intreg[0]);
4045 case X86_64_SSE_CLASS:
4046 case X86_64_SSESF_CLASS:
4047 case X86_64_SSEDF_CLASS:
4048 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4049 case X86_64_X87_CLASS:
4050 case X86_64_COMPLEX_X87_CLASS:
4051 return gen_rtx_REG (mode, FIRST_STACK_REG);
4052 case X86_64_NO_CLASS:
4053 /* Zero sized array, struct or class. */
4058 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4059 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4060 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4063 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4064 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4065 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4066 && regclass[1] == X86_64_INTEGER_CLASS
4067 && (mode == CDImode || mode == TImode || mode == TFmode)
4068 && intreg[0] + 1 == intreg[1])
4069 return gen_rtx_REG (mode, intreg[0]);
4071 /* Otherwise figure out the entries of the PARALLEL. */
4072 for (i = 0; i < n; i++)
4074 switch (regclass[i])
4076 case X86_64_NO_CLASS:
4078 case X86_64_INTEGER_CLASS:
4079 case X86_64_INTEGERSI_CLASS:
4080 /* Merge TImodes on aligned occasions here too. */
4081 if (i * 8 + 8 > bytes)
4082 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4083 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4087 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4088 if (tmpmode == BLKmode)
4090 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4091 gen_rtx_REG (tmpmode, *intreg),
4095 case X86_64_SSESF_CLASS:
4096 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4097 gen_rtx_REG (SFmode,
4098 SSE_REGNO (sse_regno)),
4102 case X86_64_SSEDF_CLASS:
4103 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4104 gen_rtx_REG (DFmode,
4105 SSE_REGNO (sse_regno)),
4109 case X86_64_SSE_CLASS:
4110 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4114 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4115 gen_rtx_REG (tmpmode,
4116 SSE_REGNO (sse_regno)),
4118 if (tmpmode == TImode)
4127 /* Empty aligned struct, union or class. */
4131 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4132 for (i = 0; i < nexps; i++)
4133 XVECEXP (ret, 0, i) = exp [i];
4137 /* Update the data in CUM to advance over an argument of mode MODE
4138 and data type TYPE. (TYPE is null for libcalls where that information
4139 may not be available.) */
4142 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4143 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4159 cum->words += words;
4160 cum->nregs -= words;
4161 cum->regno += words;
4163 if (cum->nregs <= 0)
4171 if (cum->float_in_sse < 2)
4174 if (cum->float_in_sse < 1)
4185 if (!type || !AGGREGATE_TYPE_P (type))
4187 cum->sse_words += words;
4188 cum->sse_nregs -= 1;
4189 cum->sse_regno += 1;
4190 if (cum->sse_nregs <= 0)
4202 if (!type || !AGGREGATE_TYPE_P (type))
4204 cum->mmx_words += words;
4205 cum->mmx_nregs -= 1;
4206 cum->mmx_regno += 1;
4207 if (cum->mmx_nregs <= 0)
4218 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4219 tree type, HOST_WIDE_INT words)
4221 int int_nregs, sse_nregs;
4223 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4224 cum->words += words;
4225 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4227 cum->nregs -= int_nregs;
4228 cum->sse_nregs -= sse_nregs;
4229 cum->regno += int_nregs;
4230 cum->sse_regno += sse_nregs;
4233 cum->words += words;
4237 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4238 HOST_WIDE_INT words)
4240 /* Otherwise, this should be passed indirect. */
4241 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4243 cum->words += words;
4252 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4253 tree type, int named ATTRIBUTE_UNUSED)
4255 HOST_WIDE_INT bytes, words;
4257 if (mode == BLKmode)
4258 bytes = int_size_in_bytes (type);
4260 bytes = GET_MODE_SIZE (mode);
4261 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4264 mode = type_natural_mode (type);
4266 if (TARGET_64BIT_MS_ABI)
4267 function_arg_advance_ms_64 (cum, bytes, words);
4268 else if (TARGET_64BIT)
4269 function_arg_advance_64 (cum, mode, type, words);
4271 function_arg_advance_32 (cum, mode, type, bytes, words);
4274 /* Define where to put the arguments to a function.
4275 Value is zero to push the argument on the stack,
4276 or a hard register in which to store the argument.
4278 MODE is the argument's machine mode.
4279 TYPE is the data type of the argument (as a tree).
4280 This is null for libcalls where that information may
4282 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4283 the preceding args and about the function being called.
4284 NAMED is nonzero if this argument is a named parameter
4285 (otherwise it is an extra parameter matching an ellipsis). */
4288 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4289 enum machine_mode orig_mode, tree type,
4290 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4292 static bool warnedsse, warnedmmx;
4294 /* Avoid the AL settings for the Unix64 ABI. */
4295 if (mode == VOIDmode)
4311 if (words <= cum->nregs)
4313 int regno = cum->regno;
4315 /* Fastcall allocates the first two DWORD (SImode) or
4316 smaller arguments to ECX and EDX if it isn't an
4322 || (type && AGGREGATE_TYPE_P (type)))
4325 /* ECX not EAX is the first allocated register. */
4326 if (regno == AX_REG)
4329 return gen_rtx_REG (mode, regno);
4334 if (cum->float_in_sse < 2)
4337 if (cum->float_in_sse < 1)
4347 if (!type || !AGGREGATE_TYPE_P (type))
4349 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4352 warning (0, "SSE vector argument without SSE enabled "
4356 return gen_reg_or_parallel (mode, orig_mode,
4357 cum->sse_regno + FIRST_SSE_REG);
4365 if (!type || !AGGREGATE_TYPE_P (type))
4367 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4370 warning (0, "MMX vector argument without MMX enabled "
4374 return gen_reg_or_parallel (mode, orig_mode,
4375 cum->mmx_regno + FIRST_MMX_REG);
4384 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4385 enum machine_mode orig_mode, tree type)
4387 /* Handle a hidden AL argument containing number of registers
4388 for varargs x86-64 functions. */
4389 if (mode == VOIDmode)
4390 return GEN_INT (cum->maybe_vaarg
4391 ? (cum->sse_nregs < 0
4396 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4398 &x86_64_int_parameter_registers [cum->regno],
4403 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4404 enum machine_mode orig_mode, int named)
4408 /* Avoid the AL settings for the Unix64 ABI. */
4409 if (mode == VOIDmode)
4412 /* If we've run out of registers, it goes on the stack. */
4413 if (cum->nregs == 0)
4416 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4418 /* Only floating point modes are passed in anything but integer regs. */
4419 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4422 regno = cum->regno + FIRST_SSE_REG;
4427 /* Unnamed floating parameters are passed in both the
4428 SSE and integer registers. */
4429 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4430 t2 = gen_rtx_REG (mode, regno);
4431 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4432 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4433 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4437 return gen_reg_or_parallel (mode, orig_mode, regno);
4441 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4442 tree type, int named)
4444 enum machine_mode mode = omode;
4445 HOST_WIDE_INT bytes, words;
4447 if (mode == BLKmode)
4448 bytes = int_size_in_bytes (type);
4450 bytes = GET_MODE_SIZE (mode);
4451 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4453 /* To simplify the code below, represent vector types with a vector mode
4454 even if MMX/SSE are not active. */
4455 if (type && TREE_CODE (type) == VECTOR_TYPE)
4456 mode = type_natural_mode (type);
4458 if (TARGET_64BIT_MS_ABI)
4459 return function_arg_ms_64 (cum, mode, omode, named);
4460 else if (TARGET_64BIT)
4461 return function_arg_64 (cum, mode, omode, type);
4463 return function_arg_32 (cum, mode, omode, type, bytes, words);
4466 /* A C expression that indicates when an argument must be passed by
4467 reference. If nonzero for an argument, a copy of that argument is
4468 made in memory and a pointer to the argument is passed instead of
4469 the argument itself. The pointer is passed in whatever way is
4470 appropriate for passing a pointer to that type. */
4473 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4474 enum machine_mode mode ATTRIBUTE_UNUSED,
4475 const_tree type, bool named ATTRIBUTE_UNUSED)
4477 if (TARGET_64BIT_MS_ABI)
4481 /* Arrays are passed by reference. */
4482 if (TREE_CODE (type) == ARRAY_TYPE)
4485 if (AGGREGATE_TYPE_P (type))
4487 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4488 are passed by reference. */
4489 int el2 = exact_log2 (int_size_in_bytes (type));
4490 return !(el2 >= 0 && el2 <= 3);
4494 /* __m128 is passed by reference. */
4495 /* ??? How to handle complex? For now treat them as structs,
4496 and pass them by reference if they're too large. */
4497 if (GET_MODE_SIZE (mode) > 8)
4500 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4506 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4507 ABI. Only called if TARGET_SSE. */
4509 contains_128bit_aligned_vector_p (tree type)
4511 enum machine_mode mode = TYPE_MODE (type);
4512 if (SSE_REG_MODE_P (mode)
4513 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4515 if (TYPE_ALIGN (type) < 128)
4518 if (AGGREGATE_TYPE_P (type))
4520 /* Walk the aggregates recursively. */
4521 switch (TREE_CODE (type))
4525 case QUAL_UNION_TYPE:
4529 /* Walk all the structure fields. */
4530 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4532 if (TREE_CODE (field) == FIELD_DECL
4533 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4540 /* Just for use if some languages passes arrays by value. */
4541 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4552 /* Gives the alignment boundary, in bits, of an argument with the
4553 specified mode and type. */
4556 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4560 align = TYPE_ALIGN (type);
4562 align = GET_MODE_ALIGNMENT (mode);
4563 if (align < PARM_BOUNDARY)
4564 align = PARM_BOUNDARY;
4567 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4568 make an exception for SSE modes since these require 128bit
4571 The handling here differs from field_alignment. ICC aligns MMX
4572 arguments to 4 byte boundaries, while structure fields are aligned
4573 to 8 byte boundaries. */
4575 align = PARM_BOUNDARY;
4578 if (!SSE_REG_MODE_P (mode))
4579 align = PARM_BOUNDARY;
4583 if (!contains_128bit_aligned_vector_p (type))
4584 align = PARM_BOUNDARY;
4592 /* Return true if N is a possible register number of function value. */
4595 ix86_function_value_regno_p (int regno)
4602 case FIRST_FLOAT_REG:
4603 if (TARGET_64BIT_MS_ABI)
4605 return TARGET_FLOAT_RETURNS_IN_80387;
4611 if (TARGET_MACHO || TARGET_64BIT)
4619 /* Define how to find the value returned by a function.
4620 VALTYPE is the data type of the value (as a tree).
4621 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4622 otherwise, FUNC is 0. */
4625 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4626 const_tree fntype, const_tree fn)
4630 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4631 we normally prevent this case when mmx is not available. However
4632 some ABIs may require the result to be returned like DImode. */
4633 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4634 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4636 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4637 we prevent this case when sse is not available. However some ABIs
4638 may require the result to be returned like integer TImode. */
4639 else if (mode == TImode
4640 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4641 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4643 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4644 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4645 regno = FIRST_FLOAT_REG;
4647 /* Most things go in %eax. */
4650 /* Override FP return register with %xmm0 for local functions when
4651 SSE math is enabled or for functions with sseregparm attribute. */
4652 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4654 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4655 if ((sse_level >= 1 && mode == SFmode)
4656 || (sse_level == 2 && mode == DFmode))
4657 regno = FIRST_SSE_REG;
4660 return gen_rtx_REG (orig_mode, regno);
4664 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4669 /* Handle libcalls, which don't provide a type node. */
4670 if (valtype == NULL)
4682 return gen_rtx_REG (mode, FIRST_SSE_REG);
4685 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4689 return gen_rtx_REG (mode, AX_REG);
4693 ret = construct_container (mode, orig_mode, valtype, 1,
4694 REGPARM_MAX, SSE_REGPARM_MAX,
4695 x86_64_int_return_registers, 0);
4697 /* For zero sized structures, construct_container returns NULL, but we
4698 need to keep rest of compiler happy by returning meaningful value. */
4700 ret = gen_rtx_REG (orig_mode, AX_REG);
4706 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4708 unsigned int regno = AX_REG;
4712 if (mode == SFmode || mode == DFmode)
4713 regno = FIRST_SSE_REG;
4714 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4715 regno = FIRST_SSE_REG;
4718 return gen_rtx_REG (orig_mode, regno);
4722 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4723 enum machine_mode orig_mode, enum machine_mode mode)
4725 const_tree fn, fntype;
4728 if (fntype_or_decl && DECL_P (fntype_or_decl))
4729 fn = fntype_or_decl;
4730 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4732 if (TARGET_64BIT_MS_ABI)
4733 return function_value_ms_64 (orig_mode, mode);
4734 else if (TARGET_64BIT)
4735 return function_value_64 (orig_mode, mode, valtype);
4737 return function_value_32 (orig_mode, mode, fntype, fn);
4741 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4742 bool outgoing ATTRIBUTE_UNUSED)
4744 enum machine_mode mode, orig_mode;
4746 orig_mode = TYPE_MODE (valtype);
4747 mode = type_natural_mode (valtype);
4748 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4752 ix86_libcall_value (enum machine_mode mode)
4754 return ix86_function_value_1 (NULL, NULL, mode, mode);
4757 /* Return true iff type is returned in memory. */
4760 return_in_memory_32 (const_tree type, enum machine_mode mode)
4764 if (mode == BLKmode)
4767 size = int_size_in_bytes (type);
4769 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4772 if (VECTOR_MODE_P (mode) || mode == TImode)
4774 /* User-created vectors small enough to fit in EAX. */
4778 /* MMX/3dNow values are returned in MM0,
4779 except when it doesn't exits. */
4781 return (TARGET_MMX ? 0 : 1);
4783 /* SSE values are returned in XMM0, except when it doesn't exist. */
4785 return (TARGET_SSE ? 0 : 1);
4800 return_in_memory_64 (const_tree type, enum machine_mode mode)
4802 int needed_intregs, needed_sseregs;
4803 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4807 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4809 HOST_WIDE_INT size = int_size_in_bytes (type);
4811 /* __m128 and friends are returned in xmm0. */
4812 if (!COMPLEX_MODE_P (mode) && size == 16 && VECTOR_MODE_P (mode))
4815 /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4816 return (size != 1 && size != 2 && size != 4 && size != 8)
4817 || COMPLEX_MODE_P (mode);
4821 ix86_return_in_memory (const_tree type)
4823 const enum machine_mode mode = type_natural_mode (type);
4825 if (TARGET_64BIT_MS_ABI)
4826 return return_in_memory_ms_64 (type, mode);
4827 else if (TARGET_64BIT)
4828 return return_in_memory_64 (type, mode);
4830 return return_in_memory_32 (type, mode);
4833 /* Return false iff TYPE is returned in memory. This version is used
4834 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4835 but differs notably in that when MMX is available, 8-byte vectors
4836 are returned in memory, rather than in MMX registers. */
4839 ix86_sol10_return_in_memory (const_tree type)
4842 enum machine_mode mode = type_natural_mode (type);
4845 return return_in_memory_64 (type, mode);
4847 if (mode == BLKmode)
4850 size = int_size_in_bytes (type);
4852 if (VECTOR_MODE_P (mode))
4854 /* Return in memory only if MMX registers *are* available. This
4855 seems backwards, but it is consistent with the existing
4862 else if (mode == TImode)
4864 else if (mode == XFmode)
4870 /* When returning SSE vector types, we have a choice of either
4871 (1) being abi incompatible with a -march switch, or
4872 (2) generating an error.
4873 Given no good solution, I think the safest thing is one warning.
4874 The user won't be able to use -Werror, but....
4876 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4877 called in response to actually generating a caller or callee that
4878 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4879 via aggregate_value_p for general type probing from tree-ssa. */
4882 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4884 static bool warnedsse, warnedmmx;
4886 if (!TARGET_64BIT && type)
4888 /* Look at the return type of the function, not the function type. */
4889 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4891 if (!TARGET_SSE && !warnedsse)
4894 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4897 warning (0, "SSE vector return without SSE enabled "
4902 if (!TARGET_MMX && !warnedmmx)
4904 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4907 warning (0, "MMX vector return without MMX enabled "
4917 /* Create the va_list data type. */
4920 ix86_build_builtin_va_list (void)
4922 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4924 /* For i386 we use plain pointer to argument area. */
4925 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4926 return build_pointer_type (char_type_node);
4928 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4929 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4931 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4932 unsigned_type_node);
4933 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4934 unsigned_type_node);
4935 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4937 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4940 va_list_gpr_counter_field = f_gpr;
4941 va_list_fpr_counter_field = f_fpr;
4943 DECL_FIELD_CONTEXT (f_gpr) = record;
4944 DECL_FIELD_CONTEXT (f_fpr) = record;
4945 DECL_FIELD_CONTEXT (f_ovf) = record;
4946 DECL_FIELD_CONTEXT (f_sav) = record;
4948 TREE_CHAIN (record) = type_decl;
4949 TYPE_NAME (record) = type_decl;
4950 TYPE_FIELDS (record) = f_gpr;
4951 TREE_CHAIN (f_gpr) = f_fpr;
4952 TREE_CHAIN (f_fpr) = f_ovf;
4953 TREE_CHAIN (f_ovf) = f_sav;
4955 layout_type (record);
4957 /* The correct type is an array type of one element. */
4958 return build_array_type (record, build_index_type (size_zero_node));
4961 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4964 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4974 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4977 /* Indicate to allocate space on the stack for varargs save area. */
4978 ix86_save_varrargs_registers = 1;
4979 /* We need 16-byte stack alignment to save SSE registers. If user
4980 asked for lower preferred_stack_boundary, lets just hope that he knows
4981 what he is doing and won't varargs SSE values.
4983 We also may end up assuming that only 64bit values are stored in SSE
4984 register let some floating point program work. */
4985 if (ix86_preferred_stack_boundary >= 128)
4986 cfun->stack_alignment_needed = 128;
4988 save_area = frame_pointer_rtx;
4989 set = get_varargs_alias_set ();
4991 for (i = cum->regno;
4993 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4996 mem = gen_rtx_MEM (Pmode,
4997 plus_constant (save_area, i * UNITS_PER_WORD));
4998 MEM_NOTRAP_P (mem) = 1;
4999 set_mem_alias_set (mem, set);
5000 emit_move_insn (mem, gen_rtx_REG (Pmode,
5001 x86_64_int_parameter_registers[i]));
5004 if (cum->sse_nregs && cfun->va_list_fpr_size)
5006 /* Now emit code to save SSE registers. The AX parameter contains number
5007 of SSE parameter registers used to call this function. We use
5008 sse_prologue_save insn template that produces computed jump across
5009 SSE saves. We need some preparation work to get this working. */
5011 label = gen_label_rtx ();
5012 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5014 /* Compute address to jump to :
5015 label - 5*eax + nnamed_sse_arguments*5 */
5016 tmp_reg = gen_reg_rtx (Pmode);
5017 nsse_reg = gen_reg_rtx (Pmode);
5018 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5019 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5020 gen_rtx_MULT (Pmode, nsse_reg,
5025 gen_rtx_CONST (DImode,
5026 gen_rtx_PLUS (DImode,
5028 GEN_INT (cum->sse_regno * 4))));
5030 emit_move_insn (nsse_reg, label_ref);
5031 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5033 /* Compute address of memory block we save into. We always use pointer
5034 pointing 127 bytes after first byte to store - this is needed to keep
5035 instruction size limited by 4 bytes. */
5036 tmp_reg = gen_reg_rtx (Pmode);
5037 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5038 plus_constant (save_area,
5039 8 * REGPARM_MAX + 127)));
5040 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5041 MEM_NOTRAP_P (mem) = 1;
5042 set_mem_alias_set (mem, set);
5043 set_mem_align (mem, BITS_PER_WORD);
5045 /* And finally do the dirty job! */
5046 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5047 GEN_INT (cum->sse_regno), label));
5052 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5054 alias_set_type set = get_varargs_alias_set ();
5057 for (i = cum->regno; i < REGPARM_MAX; i++)
5061 mem = gen_rtx_MEM (Pmode,
5062 plus_constant (virtual_incoming_args_rtx,
5063 i * UNITS_PER_WORD));
5064 MEM_NOTRAP_P (mem) = 1;
5065 set_mem_alias_set (mem, set);
5067 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5068 emit_move_insn (mem, reg);
5073 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5074 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5077 CUMULATIVE_ARGS next_cum;
5080 /* This argument doesn't appear to be used anymore. Which is good,
5081 because the old code here didn't suppress rtl generation. */
5082 gcc_assert (!no_rtl);
5087 fntype = TREE_TYPE (current_function_decl);
5089 /* For varargs, we do not want to skip the dummy va_dcl argument.
5090 For stdargs, we do want to skip the last named argument. */
5092 if (stdarg_p (fntype))
5093 function_arg_advance (&next_cum, mode, type, 1);
5095 if (TARGET_64BIT_MS_ABI)
5096 setup_incoming_varargs_ms_64 (&next_cum);
5098 setup_incoming_varargs_64 (&next_cum);
5101 /* Implement va_start. */
5104 ix86_va_start (tree valist, rtx nextarg)
5106 HOST_WIDE_INT words, n_gpr, n_fpr;
5107 tree f_gpr, f_fpr, f_ovf, f_sav;
5108 tree gpr, fpr, ovf, sav, t;
5111 /* Only 64bit target needs something special. */
5112 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5114 std_expand_builtin_va_start (valist, nextarg);
5118 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5119 f_fpr = TREE_CHAIN (f_gpr);
5120 f_ovf = TREE_CHAIN (f_fpr);
5121 f_sav = TREE_CHAIN (f_ovf);
5123 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5124 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5125 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5126 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5127 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5129 /* Count number of gp and fp argument registers used. */
5130 words = current_function_args_info.words;
5131 n_gpr = current_function_args_info.regno;
5132 n_fpr = current_function_args_info.sse_regno;
5134 if (cfun->va_list_gpr_size)
5136 type = TREE_TYPE (gpr);
5137 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5138 build_int_cst (type, n_gpr * 8));
5139 TREE_SIDE_EFFECTS (t) = 1;
5140 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5143 if (cfun->va_list_fpr_size)
5145 type = TREE_TYPE (fpr);
5146 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5147 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5148 TREE_SIDE_EFFECTS (t) = 1;
5149 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5152 /* Find the overflow area. */
5153 type = TREE_TYPE (ovf);
5154 t = make_tree (type, virtual_incoming_args_rtx);
5156 t = build2 (POINTER_PLUS_EXPR, type, t,
5157 size_int (words * UNITS_PER_WORD));
5158 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5159 TREE_SIDE_EFFECTS (t) = 1;
5160 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5162 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5164 /* Find the register save area.
5165 Prologue of the function save it right above stack frame. */
5166 type = TREE_TYPE (sav);
5167 t = make_tree (type, frame_pointer_rtx);
5168 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5169 TREE_SIDE_EFFECTS (t) = 1;
5170 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5174 /* Implement va_arg. */
5177 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5179 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5180 tree f_gpr, f_fpr, f_ovf, f_sav;
5181 tree gpr, fpr, ovf, sav, t;
5183 tree lab_false, lab_over = NULL_TREE;
5188 enum machine_mode nat_mode;
5190 /* Only 64bit target needs something special. */
5191 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5192 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5194 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5195 f_fpr = TREE_CHAIN (f_gpr);
5196 f_ovf = TREE_CHAIN (f_fpr);
5197 f_sav = TREE_CHAIN (f_ovf);
5199 valist = build_va_arg_indirect_ref (valist);
5200 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5201 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5202 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5203 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5205 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5207 type = build_pointer_type (type);
5208 size = int_size_in_bytes (type);
5209 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5211 nat_mode = type_natural_mode (type);
5212 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5213 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5215 /* Pull the value out of the saved registers. */
5217 addr = create_tmp_var (ptr_type_node, "addr");
5218 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5222 int needed_intregs, needed_sseregs;
5224 tree int_addr, sse_addr;
5226 lab_false = create_artificial_label ();
5227 lab_over = create_artificial_label ();
5229 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5231 need_temp = (!REG_P (container)
5232 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5233 || TYPE_ALIGN (type) > 128));
5235 /* In case we are passing structure, verify that it is consecutive block
5236 on the register save area. If not we need to do moves. */
5237 if (!need_temp && !REG_P (container))
5239 /* Verify that all registers are strictly consecutive */
5240 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5244 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5246 rtx slot = XVECEXP (container, 0, i);
5247 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5248 || INTVAL (XEXP (slot, 1)) != i * 16)
5256 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5258 rtx slot = XVECEXP (container, 0, i);
5259 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5260 || INTVAL (XEXP (slot, 1)) != i * 8)
5272 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5273 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5274 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5275 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5278 /* First ensure that we fit completely in registers. */
5281 t = build_int_cst (TREE_TYPE (gpr),
5282 (REGPARM_MAX - needed_intregs + 1) * 8);
5283 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5284 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5285 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5286 gimplify_and_add (t, pre_p);
5290 t = build_int_cst (TREE_TYPE (fpr),
5291 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5293 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5294 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5295 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5296 gimplify_and_add (t, pre_p);
5299 /* Compute index to start of area used for integer regs. */
5302 /* int_addr = gpr + sav; */
5303 t = fold_convert (sizetype, gpr);
5304 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5305 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5306 gimplify_and_add (t, pre_p);
5310 /* sse_addr = fpr + sav; */
5311 t = fold_convert (sizetype, fpr);
5312 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5313 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5314 gimplify_and_add (t, pre_p);
5319 tree temp = create_tmp_var (type, "va_arg_tmp");
5322 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5323 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5324 gimplify_and_add (t, pre_p);
5326 for (i = 0; i < XVECLEN (container, 0); i++)
5328 rtx slot = XVECEXP (container, 0, i);
5329 rtx reg = XEXP (slot, 0);
5330 enum machine_mode mode = GET_MODE (reg);
5331 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5332 tree addr_type = build_pointer_type (piece_type);
5335 tree dest_addr, dest;
5337 if (SSE_REGNO_P (REGNO (reg)))
5339 src_addr = sse_addr;
5340 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5344 src_addr = int_addr;
5345 src_offset = REGNO (reg) * 8;
5347 src_addr = fold_convert (addr_type, src_addr);
5348 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5349 size_int (src_offset));
5350 src = build_va_arg_indirect_ref (src_addr);
5352 dest_addr = fold_convert (addr_type, addr);
5353 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5354 size_int (INTVAL (XEXP (slot, 1))));
5355 dest = build_va_arg_indirect_ref (dest_addr);
5357 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5358 gimplify_and_add (t, pre_p);
5364 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5365 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5366 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5367 gimplify_and_add (t, pre_p);
5371 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5372 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5373 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5374 gimplify_and_add (t, pre_p);
5377 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5378 gimplify_and_add (t, pre_p);
5380 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5381 append_to_statement_list (t, pre_p);
5384 /* ... otherwise out of the overflow area. */
5386 /* Care for on-stack alignment if needed. */
5387 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5388 || integer_zerop (TYPE_SIZE (type)))
5392 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5393 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5394 size_int (align - 1));
5395 t = fold_convert (sizetype, t);
5396 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5398 t = fold_convert (TREE_TYPE (ovf), t);
5400 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5402 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5403 gimplify_and_add (t2, pre_p);
5405 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5406 size_int (rsize * UNITS_PER_WORD));
5407 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5408 gimplify_and_add (t, pre_p);
5412 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5413 append_to_statement_list (t, pre_p);
5416 ptrtype = build_pointer_type (type);
5417 addr = fold_convert (ptrtype, addr);
5420 addr = build_va_arg_indirect_ref (addr);
5421 return build_va_arg_indirect_ref (addr);
5424 /* Return nonzero if OPNUM's MEM should be matched
5425 in movabs* patterns. */
5428 ix86_check_movabs (rtx insn, int opnum)
5432 set = PATTERN (insn);
5433 if (GET_CODE (set) == PARALLEL)
5434 set = XVECEXP (set, 0, 0);
5435 gcc_assert (GET_CODE (set) == SET);
5436 mem = XEXP (set, opnum);
5437 while (GET_CODE (mem) == SUBREG)
5438 mem = SUBREG_REG (mem);
5439 gcc_assert (MEM_P (mem));
5440 return (volatile_ok || !MEM_VOLATILE_P (mem));
5443 /* Initialize the table of extra 80387 mathematical constants. */
5446 init_ext_80387_constants (void)
5448 static const char * cst[5] =
5450 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5451 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5452 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5453 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5454 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5458 for (i = 0; i < 5; i++)
5460 real_from_string (&ext_80387_constants_table[i], cst[i]);
5461 /* Ensure each constant is rounded to XFmode precision. */
5462 real_convert (&ext_80387_constants_table[i],
5463 XFmode, &ext_80387_constants_table[i]);
5466 ext_80387_constants_init = 1;
5469 /* Return true if the constant is something that can be loaded with
5470 a special instruction. */
5473 standard_80387_constant_p (rtx x)
5475 enum machine_mode mode = GET_MODE (x);
5479 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5482 if (x == CONST0_RTX (mode))
5484 if (x == CONST1_RTX (mode))
5487 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5489 /* For XFmode constants, try to find a special 80387 instruction when
5490 optimizing for size or on those CPUs that benefit from them. */
5492 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5496 if (! ext_80387_constants_init)
5497 init_ext_80387_constants ();
5499 for (i = 0; i < 5; i++)
5500 if (real_identical (&r, &ext_80387_constants_table[i]))
5504 /* Load of the constant -0.0 or -1.0 will be split as
5505 fldz;fchs or fld1;fchs sequence. */
5506 if (real_isnegzero (&r))
5508 if (real_identical (&r, &dconstm1))
5514 /* Return the opcode of the special instruction to be used to load
5518 standard_80387_constant_opcode (rtx x)
5520 switch (standard_80387_constant_p (x))
5544 /* Return the CONST_DOUBLE representing the 80387 constant that is
5545 loaded by the specified special instruction. The argument IDX
5546 matches the return value from standard_80387_constant_p. */
5549 standard_80387_constant_rtx (int idx)
5553 if (! ext_80387_constants_init)
5554 init_ext_80387_constants ();
5570 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5574 /* Return 1 if mode is a valid mode for sse. */
5576 standard_sse_mode_p (enum machine_mode mode)
5593 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5596 standard_sse_constant_p (rtx x)
5598 enum machine_mode mode = GET_MODE (x);
5600 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5602 if (vector_all_ones_operand (x, mode)
5603 && standard_sse_mode_p (mode))
5604 return TARGET_SSE2 ? 2 : -1;
5609 /* Return the opcode of the special instruction to be used to load
5613 standard_sse_constant_opcode (rtx insn, rtx x)
5615 switch (standard_sse_constant_p (x))
5618 if (get_attr_mode (insn) == MODE_V4SF)
5619 return "xorps\t%0, %0";
5620 else if (get_attr_mode (insn) == MODE_V2DF)
5621 return "xorpd\t%0, %0";
5623 return "pxor\t%0, %0";
5625 return "pcmpeqd\t%0, %0";
5630 /* Returns 1 if OP contains a symbol reference */
5633 symbolic_reference_mentioned_p (rtx op)
5638 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5641 fmt = GET_RTX_FORMAT (GET_CODE (op));
5642 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5648 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5649 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5653 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5660 /* Return 1 if it is appropriate to emit `ret' instructions in the
5661 body of a function. Do this only if the epilogue is simple, needing a
5662 couple of insns. Prior to reloading, we can't tell how many registers
5663 must be saved, so return 0 then. Return 0 if there is no frame
5664 marker to de-allocate. */
5667 ix86_can_use_return_insn_p (void)
5669 struct ix86_frame frame;
5671 if (! reload_completed || frame_pointer_needed)
5674 /* Don't allow more than 32 pop, since that's all we can do
5675 with one instruction. */
5676 if (current_function_pops_args
5677 && current_function_args_size >= 32768)
5680 ix86_compute_frame_layout (&frame);
5681 return frame.to_allocate == 0 && frame.nregs == 0;
5684 /* Value should be nonzero if functions must have frame pointers.
5685 Zero means the frame pointer need not be set up (and parms may
5686 be accessed via the stack pointer) in functions that seem suitable. */
5689 ix86_frame_pointer_required (void)
5691 /* If we accessed previous frames, then the generated code expects
5692 to be able to access the saved ebp value in our frame. */
5693 if (cfun->machine->accesses_prev_frame)
5696 /* Several x86 os'es need a frame pointer for other reasons,
5697 usually pertaining to setjmp. */
5698 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5701 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5702 the frame pointer by default. Turn it back on now if we've not
5703 got a leaf function. */
5704 if (TARGET_OMIT_LEAF_FRAME_POINTER
5705 && (!current_function_is_leaf
5706 || ix86_current_function_calls_tls_descriptor))
5709 if (current_function_profile)
5715 /* Record that the current function accesses previous call frames. */
5718 ix86_setup_frame_addresses (void)
5720 cfun->machine->accesses_prev_frame = 1;
5723 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5724 # define USE_HIDDEN_LINKONCE 1
5726 # define USE_HIDDEN_LINKONCE 0
5729 static int pic_labels_used;
5731 /* Fills in the label name that should be used for a pc thunk for
5732 the given register. */
5735 get_pc_thunk_name (char name[32], unsigned int regno)
5737 gcc_assert (!TARGET_64BIT);
5739 if (USE_HIDDEN_LINKONCE)
5740 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5742 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5746 /* This function generates code for -fpic that loads %ebx with
5747 the return address of the caller and then returns. */
5750 ix86_file_end (void)
5755 for (regno = 0; regno < 8; ++regno)
5759 if (! ((pic_labels_used >> regno) & 1))
5762 get_pc_thunk_name (name, regno);
5767 switch_to_section (darwin_sections[text_coal_section]);
5768 fputs ("\t.weak_definition\t", asm_out_file);
5769 assemble_name (asm_out_file, name);
5770 fputs ("\n\t.private_extern\t", asm_out_file);
5771 assemble_name (asm_out_file, name);
5772 fputs ("\n", asm_out_file);
5773 ASM_OUTPUT_LABEL (asm_out_file, name);
5777 if (USE_HIDDEN_LINKONCE)
5781 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5783 TREE_PUBLIC (decl) = 1;
5784 TREE_STATIC (decl) = 1;
5785 DECL_ONE_ONLY (decl) = 1;
5787 (*targetm.asm_out.unique_section) (decl, 0);
5788 switch_to_section (get_named_section (decl, NULL, 0));
5790 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5791 fputs ("\t.hidden\t", asm_out_file);
5792 assemble_name (asm_out_file, name);
5793 fputc ('\n', asm_out_file);
5794 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5798 switch_to_section (text_section);
5799 ASM_OUTPUT_LABEL (asm_out_file, name);
5802 xops[0] = gen_rtx_REG (SImode, regno);
5803 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5804 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5805 output_asm_insn ("ret", xops);
5808 if (NEED_INDICATE_EXEC_STACK)
5809 file_end_indicate_exec_stack ();
5812 /* Emit code for the SET_GOT patterns. */
5815 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5821 if (TARGET_VXWORKS_RTP && flag_pic)
5823 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5824 xops[2] = gen_rtx_MEM (Pmode,
5825 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5826 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5828 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5829 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5830 an unadorned address. */
5831 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5832 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5833 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5837 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5839 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5841 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5844 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5846 output_asm_insn ("call\t%a2", xops);
5849 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5850 is what will be referenced by the Mach-O PIC subsystem. */
5852 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5855 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5856 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5859 output_asm_insn ("pop{l}\t%0", xops);
5864 get_pc_thunk_name (name, REGNO (dest));
5865 pic_labels_used |= 1 << REGNO (dest);
5867 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5868 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5869 output_asm_insn ("call\t%X2", xops);
5870 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5871 is what will be referenced by the Mach-O PIC subsystem. */
5874 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5876 targetm.asm_out.internal_label (asm_out_file, "L",
5877 CODE_LABEL_NUMBER (label));
5884 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5885 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5887 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5892 /* Generate an "push" pattern for input ARG. */
5897 return gen_rtx_SET (VOIDmode,
5899 gen_rtx_PRE_DEC (Pmode,
5900 stack_pointer_rtx)),
5904 /* Return >= 0 if there is an unused call-clobbered register available
5905 for the entire function. */
5908 ix86_select_alt_pic_regnum (void)
5910 if (current_function_is_leaf && !current_function_profile
5911 && !ix86_current_function_calls_tls_descriptor)
5914 for (i = 2; i >= 0; --i)
5915 if (!df_regs_ever_live_p (i))
5919 return INVALID_REGNUM;
5922 /* Return 1 if we need to save REGNO. */
5924 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5926 if (pic_offset_table_rtx
5927 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5928 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5929 || current_function_profile
5930 || current_function_calls_eh_return
5931 || current_function_uses_const_pool))
5933 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5938 if (current_function_calls_eh_return && maybe_eh_return)
5943 unsigned test = EH_RETURN_DATA_REGNO (i);
5944 if (test == INVALID_REGNUM)
5951 if (cfun->machine->force_align_arg_pointer
5952 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5955 return (df_regs_ever_live_p (regno)
5956 && !call_used_regs[regno]
5957 && !fixed_regs[regno]
5958 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5961 /* Return number of registers to be saved on the stack. */
5964 ix86_nsaved_regs (void)
5969 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5970 if (ix86_save_reg (regno, true))
5975 /* Return the offset between two registers, one to be eliminated, and the other
5976 its replacement, at the start of a routine. */
5979 ix86_initial_elimination_offset (int from, int to)
5981 struct ix86_frame frame;
5982 ix86_compute_frame_layout (&frame);
5984 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5985 return frame.hard_frame_pointer_offset;
5986 else if (from == FRAME_POINTER_REGNUM
5987 && to == HARD_FRAME_POINTER_REGNUM)
5988 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5991 gcc_assert (to == STACK_POINTER_REGNUM);
5993 if (from == ARG_POINTER_REGNUM)
5994 return frame.stack_pointer_offset;
5996 gcc_assert (from == FRAME_POINTER_REGNUM);
5997 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6001 /* Fill structure ix86_frame about frame of currently computed function. */
6004 ix86_compute_frame_layout (struct ix86_frame *frame)
6006 HOST_WIDE_INT total_size;
6007 unsigned int stack_alignment_needed;
6008 HOST_WIDE_INT offset;
6009 unsigned int preferred_alignment;
6010 HOST_WIDE_INT size = get_frame_size ();
6012 frame->nregs = ix86_nsaved_regs ();
6015 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
6016 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
6018 /* During reload iteration the amount of registers saved can change.
6019 Recompute the value as needed. Do not recompute when amount of registers
6020 didn't change as reload does multiple calls to the function and does not
6021 expect the decision to change within single iteration. */
6023 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6025 int count = frame->nregs;
6027 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6028 /* The fast prologue uses move instead of push to save registers. This
6029 is significantly longer, but also executes faster as modern hardware
6030 can execute the moves in parallel, but can't do that for push/pop.
6032 Be careful about choosing what prologue to emit: When function takes
6033 many instructions to execute we may use slow version as well as in
6034 case function is known to be outside hot spot (this is known with
6035 feedback only). Weight the size of function by number of registers
6036 to save as it is cheap to use one or two push instructions but very
6037 slow to use many of them. */
6039 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6040 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6041 || (flag_branch_probabilities
6042 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6043 cfun->machine->use_fast_prologue_epilogue = false;
6045 cfun->machine->use_fast_prologue_epilogue
6046 = !expensive_function_p (count);
6048 if (TARGET_PROLOGUE_USING_MOVE
6049 && cfun->machine->use_fast_prologue_epilogue)
6050 frame->save_regs_using_mov = true;
6052 frame->save_regs_using_mov = false;
6055 /* Skip return address and saved base pointer. */
6056 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6058 frame->hard_frame_pointer_offset = offset;
6060 /* Do some sanity checking of stack_alignment_needed and
6061 preferred_alignment, since i386 port is the only using those features
6062 that may break easily. */
6064 gcc_assert (!size || stack_alignment_needed);
6065 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6066 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6067 gcc_assert (stack_alignment_needed
6068 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6070 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6071 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6073 /* Register save area */
6074 offset += frame->nregs * UNITS_PER_WORD;
6077 if (ix86_save_varrargs_registers)
6079 offset += X86_64_VARARGS_SIZE;
6080 frame->va_arg_size = X86_64_VARARGS_SIZE;
6083 frame->va_arg_size = 0;
6085 /* Align start of frame for local function. */
6086 frame->padding1 = ((offset + stack_alignment_needed - 1)
6087 & -stack_alignment_needed) - offset;
6089 offset += frame->padding1;
6091 /* Frame pointer points here. */
6092 frame->frame_pointer_offset = offset;
6096 /* Add outgoing arguments area. Can be skipped if we eliminated
6097 all the function calls as dead code.
6098 Skipping is however impossible when function calls alloca. Alloca
6099 expander assumes that last current_function_outgoing_args_size
6100 of stack frame are unused. */
6101 if (ACCUMULATE_OUTGOING_ARGS
6102 && (!current_function_is_leaf || current_function_calls_alloca
6103 || ix86_current_function_calls_tls_descriptor))
6105 offset += current_function_outgoing_args_size;
6106 frame->outgoing_arguments_size = current_function_outgoing_args_size;
6109 frame->outgoing_arguments_size = 0;
6111 /* Align stack boundary. Only needed if we're calling another function
6113 if (!current_function_is_leaf || current_function_calls_alloca
6114 || ix86_current_function_calls_tls_descriptor)
6115 frame->padding2 = ((offset + preferred_alignment - 1)
6116 & -preferred_alignment) - offset;
6118 frame->padding2 = 0;
6120 offset += frame->padding2;
6122 /* We've reached end of stack frame. */
6123 frame->stack_pointer_offset = offset;
6125 /* Size prologue needs to allocate. */
6126 frame->to_allocate =
6127 (size + frame->padding1 + frame->padding2
6128 + frame->outgoing_arguments_size + frame->va_arg_size);
6130 if ((!frame->to_allocate && frame->nregs <= 1)
6131 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6132 frame->save_regs_using_mov = false;
6134 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6135 && current_function_is_leaf
6136 && !ix86_current_function_calls_tls_descriptor)
6138 frame->red_zone_size = frame->to_allocate;
6139 if (frame->save_regs_using_mov)
6140 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6141 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6142 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6145 frame->red_zone_size = 0;
6146 frame->to_allocate -= frame->red_zone_size;
6147 frame->stack_pointer_offset -= frame->red_zone_size;
6149 fprintf (stderr, "\n");
6150 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6151 fprintf (stderr, "size: %ld\n", (long)size);
6152 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6153 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6154 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6155 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6156 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6157 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6158 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6159 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6160 (long)frame->hard_frame_pointer_offset);
6161 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6162 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6163 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6164 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6168 /* Emit code to save registers in the prologue. */
6171 ix86_emit_save_regs (void)
6176 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6177 if (ix86_save_reg (regno, true))
6179 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6180 RTX_FRAME_RELATED_P (insn) = 1;
6184 /* Emit code to save registers using MOV insns. First register
6185 is restored from POINTER + OFFSET. */
6187 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6192 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6193 if (ix86_save_reg (regno, true))
6195 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6197 gen_rtx_REG (Pmode, regno));
6198 RTX_FRAME_RELATED_P (insn) = 1;
6199 offset += UNITS_PER_WORD;
6203 /* Expand prologue or epilogue stack adjustment.
6204 The pattern exist to put a dependency on all ebp-based memory accesses.
6205 STYLE should be negative if instructions should be marked as frame related,
6206 zero if %r11 register is live and cannot be freely used and positive
6210 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6215 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6216 else if (x86_64_immediate_operand (offset, DImode))
6217 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6221 /* r11 is used by indirect sibcall return as well, set before the
6222 epilogue and used after the epilogue. ATM indirect sibcall
6223 shouldn't be used together with huge frame sizes in one
6224 function because of the frame_size check in sibcall.c. */
6226 r11 = gen_rtx_REG (DImode, R11_REG);
6227 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6229 RTX_FRAME_RELATED_P (insn) = 1;
6230 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6234 RTX_FRAME_RELATED_P (insn) = 1;
6237 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6240 ix86_internal_arg_pointer (void)
6242 bool has_force_align_arg_pointer =
6243 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6244 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6245 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6246 && DECL_NAME (current_function_decl)
6247 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6248 && DECL_FILE_SCOPE_P (current_function_decl))
6249 || ix86_force_align_arg_pointer
6250 || has_force_align_arg_pointer)
6252 /* Nested functions can't realign the stack due to a register
6254 if (DECL_CONTEXT (current_function_decl)
6255 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6257 if (ix86_force_align_arg_pointer)
6258 warning (0, "-mstackrealign ignored for nested functions");
6259 if (has_force_align_arg_pointer)
6260 error ("%s not supported for nested functions",
6261 ix86_force_align_arg_pointer_string);
6262 return virtual_incoming_args_rtx;
6264 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6265 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6268 return virtual_incoming_args_rtx;
6271 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6272 This is called from dwarf2out.c to emit call frame instructions
6273 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6275 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6277 rtx unspec = SET_SRC (pattern);
6278 gcc_assert (GET_CODE (unspec) == UNSPEC);
6282 case UNSPEC_REG_SAVE:
6283 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6284 SET_DEST (pattern));
6286 case UNSPEC_DEF_CFA:
6287 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6288 INTVAL (XVECEXP (unspec, 0, 0)));
6295 /* Expand the prologue into a bunch of separate insns. */
6298 ix86_expand_prologue (void)
6302 struct ix86_frame frame;
6303 HOST_WIDE_INT allocate;
6305 ix86_compute_frame_layout (&frame);
6307 if (cfun->machine->force_align_arg_pointer)
6311 /* Grab the argument pointer. */
6312 x = plus_constant (stack_pointer_rtx, 4);
6313 y = cfun->machine->force_align_arg_pointer;
6314 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6315 RTX_FRAME_RELATED_P (insn) = 1;
6317 /* The unwind info consists of two parts: install the fafp as the cfa,
6318 and record the fafp as the "save register" of the stack pointer.
6319 The later is there in order that the unwinder can see where it
6320 should restore the stack pointer across the and insn. */
6321 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6322 x = gen_rtx_SET (VOIDmode, y, x);
6323 RTX_FRAME_RELATED_P (x) = 1;
6324 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6326 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6327 RTX_FRAME_RELATED_P (y) = 1;
6328 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6329 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6330 REG_NOTES (insn) = x;
6332 /* Align the stack. */
6333 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6336 /* And here we cheat like madmen with the unwind info. We force the
6337 cfa register back to sp+4, which is exactly what it was at the
6338 start of the function. Re-pushing the return address results in
6339 the return at the same spot relative to the cfa, and thus is
6340 correct wrt the unwind info. */
6341 x = cfun->machine->force_align_arg_pointer;
6342 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6343 insn = emit_insn (gen_push (x));
6344 RTX_FRAME_RELATED_P (insn) = 1;
6347 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6348 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6349 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6350 REG_NOTES (insn) = x;
6353 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6354 slower on all targets. Also sdb doesn't like it. */
6356 if (frame_pointer_needed)
6358 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6359 RTX_FRAME_RELATED_P (insn) = 1;
6361 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6362 RTX_FRAME_RELATED_P (insn) = 1;
6365 allocate = frame.to_allocate;
6367 if (!frame.save_regs_using_mov)
6368 ix86_emit_save_regs ();
6370 allocate += frame.nregs * UNITS_PER_WORD;
6372 /* When using red zone we may start register saving before allocating
6373 the stack frame saving one cycle of the prologue. However I will
6374 avoid doing this if I am going to have to probe the stack since
6375 at least on x86_64 the stack probe can turn into a call that clobbers
6376 a red zone location */
6377 if (TARGET_RED_ZONE && frame.save_regs_using_mov
6378 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6379 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6380 : stack_pointer_rtx,
6381 -frame.nregs * UNITS_PER_WORD);
6385 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6386 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6387 GEN_INT (-allocate), -1);
6390 /* Only valid for Win32. */
6391 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6395 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6397 if (TARGET_64BIT_MS_ABI)
6400 eax_live = ix86_eax_live_at_start_p ();
6404 emit_insn (gen_push (eax));
6405 allocate -= UNITS_PER_WORD;
6408 emit_move_insn (eax, GEN_INT (allocate));
6411 insn = gen_allocate_stack_worker_64 (eax);
6413 insn = gen_allocate_stack_worker_32 (eax);
6414 insn = emit_insn (insn);
6415 RTX_FRAME_RELATED_P (insn) = 1;
6416 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6417 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6418 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6419 t, REG_NOTES (insn));
6423 if (frame_pointer_needed)
6424 t = plus_constant (hard_frame_pointer_rtx,
6427 - frame.nregs * UNITS_PER_WORD);
6429 t = plus_constant (stack_pointer_rtx, allocate);
6430 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6434 if (frame.save_regs_using_mov
6435 && !(TARGET_RED_ZONE
6436 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6438 if (!frame_pointer_needed || !frame.to_allocate)
6439 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6441 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6442 -frame.nregs * UNITS_PER_WORD);
6445 pic_reg_used = false;
6446 if (pic_offset_table_rtx
6447 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6448 || current_function_profile))
6450 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6452 if (alt_pic_reg_used != INVALID_REGNUM)
6453 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6455 pic_reg_used = true;
6462 if (ix86_cmodel == CM_LARGE_PIC)
6464 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6465 rtx label = gen_label_rtx ();
6467 LABEL_PRESERVE_P (label) = 1;
6468 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6469 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6470 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6471 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6472 pic_offset_table_rtx, tmp_reg));
6475 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6478 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6481 /* Prevent function calls from being scheduled before the call to mcount.
6482 In the pic_reg_used case, make sure that the got load isn't deleted. */
6483 if (current_function_profile)
6486 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6487 emit_insn (gen_blockage ());
6491 /* Emit code to restore saved registers using MOV insns. First register
6492 is restored from POINTER + OFFSET. */
6494 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6495 int maybe_eh_return)
6498 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6500 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6501 if (ix86_save_reg (regno, maybe_eh_return))
6503 /* Ensure that adjust_address won't be forced to produce pointer
6504 out of range allowed by x86-64 instruction set. */
6505 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6509 r11 = gen_rtx_REG (DImode, R11_REG);
6510 emit_move_insn (r11, GEN_INT (offset));
6511 emit_insn (gen_adddi3 (r11, r11, pointer));
6512 base_address = gen_rtx_MEM (Pmode, r11);
6515 emit_move_insn (gen_rtx_REG (Pmode, regno),
6516 adjust_address (base_address, Pmode, offset));
6517 offset += UNITS_PER_WORD;
6521 /* Restore function stack, frame, and registers. */
6524 ix86_expand_epilogue (int style)
6527 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6528 struct ix86_frame frame;
6529 HOST_WIDE_INT offset;
6531 ix86_compute_frame_layout (&frame);
6533 /* Calculate start of saved registers relative to ebp. Special care
6534 must be taken for the normal return case of a function using
6535 eh_return: the eax and edx registers are marked as saved, but not
6536 restored along this path. */
6537 offset = frame.nregs;
6538 if (current_function_calls_eh_return && style != 2)
6540 offset *= -UNITS_PER_WORD;
6542 /* If we're only restoring one register and sp is not valid then
6543 using a move instruction to restore the register since it's
6544 less work than reloading sp and popping the register.
6546 The default code result in stack adjustment using add/lea instruction,
6547 while this code results in LEAVE instruction (or discrete equivalent),
6548 so it is profitable in some other cases as well. Especially when there
6549 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6550 and there is exactly one register to pop. This heuristic may need some
6551 tuning in future. */
6552 if ((!sp_valid && frame.nregs <= 1)
6553 || (TARGET_EPILOGUE_USING_MOVE
6554 && cfun->machine->use_fast_prologue_epilogue
6555 && (frame.nregs > 1 || frame.to_allocate))
6556 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6557 || (frame_pointer_needed && TARGET_USE_LEAVE
6558 && cfun->machine->use_fast_prologue_epilogue
6559 && frame.nregs == 1)
6560 || current_function_calls_eh_return)
6562 /* Restore registers. We can use ebp or esp to address the memory
6563 locations. If both are available, default to ebp, since offsets
6564 are known to be small. Only exception is esp pointing directly to the
6565 end of block of saved registers, where we may simplify addressing
6568 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6569 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6570 frame.to_allocate, style == 2);
6572 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6573 offset, style == 2);
6575 /* eh_return epilogues need %ecx added to the stack pointer. */
6578 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6580 if (frame_pointer_needed)
6582 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6583 tmp = plus_constant (tmp, UNITS_PER_WORD);
6584 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6586 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6587 emit_move_insn (hard_frame_pointer_rtx, tmp);
6589 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6594 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6595 tmp = plus_constant (tmp, (frame.to_allocate
6596 + frame.nregs * UNITS_PER_WORD));
6597 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6600 else if (!frame_pointer_needed)
6601 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6602 GEN_INT (frame.to_allocate
6603 + frame.nregs * UNITS_PER_WORD),
6605 /* If not an i386, mov & pop is faster than "leave". */
6606 else if (TARGET_USE_LEAVE || optimize_size
6607 || !cfun->machine->use_fast_prologue_epilogue)
6608 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6611 pro_epilogue_adjust_stack (stack_pointer_rtx,
6612 hard_frame_pointer_rtx,
6615 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6617 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6622 /* First step is to deallocate the stack frame so that we can
6623 pop the registers. */
6626 gcc_assert (frame_pointer_needed);
6627 pro_epilogue_adjust_stack (stack_pointer_rtx,
6628 hard_frame_pointer_rtx,
6629 GEN_INT (offset), style);
6631 else if (frame.to_allocate)
6632 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6633 GEN_INT (frame.to_allocate), style);
6635 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6636 if (ix86_save_reg (regno, false))
6639 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6641 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6643 if (frame_pointer_needed)
6645 /* Leave results in shorter dependency chains on CPUs that are
6646 able to grok it fast. */
6647 if (TARGET_USE_LEAVE)
6648 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6649 else if (TARGET_64BIT)
6650 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6652 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6656 if (cfun->machine->force_align_arg_pointer)
6658 emit_insn (gen_addsi3 (stack_pointer_rtx,
6659 cfun->machine->force_align_arg_pointer,
6663 /* Sibcall epilogues don't want a return instruction. */
6667 if (current_function_pops_args && current_function_args_size)
6669 rtx popc = GEN_INT (current_function_pops_args);
6671 /* i386 can only pop 64K bytes. If asked to pop more, pop
6672 return address, do explicit add, and jump indirectly to the
6675 if (current_function_pops_args >= 65536)
6677 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6679 /* There is no "pascal" calling convention in any 64bit ABI. */
6680 gcc_assert (!TARGET_64BIT);
6682 emit_insn (gen_popsi1 (ecx));
6683 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6684 emit_jump_insn (gen_return_indirect_internal (ecx));
6687 emit_jump_insn (gen_return_pop_internal (popc));
6690 emit_jump_insn (gen_return_internal ());
6693 /* Reset from the function's potential modifications. */
6696 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6697 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6699 if (pic_offset_table_rtx)
6700 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6702 /* Mach-O doesn't support labels at the end of objects, so if
6703 it looks like we might want one, insert a NOP. */
6705 rtx insn = get_last_insn ();
6708 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6709 insn = PREV_INSN (insn);
6713 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6714 fputs ("\tnop\n", file);
6720 /* Extract the parts of an RTL expression that is a valid memory address
6721 for an instruction. Return 0 if the structure of the address is
6722 grossly off. Return -1 if the address contains ASHIFT, so it is not
6723 strictly valid, but still used for computing length of lea instruction. */
6726 ix86_decompose_address (rtx addr, struct ix86_address *out)
6728 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6729 rtx base_reg, index_reg;
6730 HOST_WIDE_INT scale = 1;
6731 rtx scale_rtx = NULL_RTX;
6733 enum ix86_address_seg seg = SEG_DEFAULT;
6735 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6737 else if (GET_CODE (addr) == PLUS)
6747 addends[n++] = XEXP (op, 1);
6750 while (GET_CODE (op) == PLUS);
6755 for (i = n; i >= 0; --i)
6758 switch (GET_CODE (op))
6763 index = XEXP (op, 0);
6764 scale_rtx = XEXP (op, 1);
6768 if (XINT (op, 1) == UNSPEC_TP
6769 && TARGET_TLS_DIRECT_SEG_REFS
6770 && seg == SEG_DEFAULT)
6771 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6800 else if (GET_CODE (addr) == MULT)
6802 index = XEXP (addr, 0); /* index*scale */
6803 scale_rtx = XEXP (addr, 1);
6805 else if (GET_CODE (addr) == ASHIFT)
6809 /* We're called for lea too, which implements ashift on occasion. */
6810 index = XEXP (addr, 0);
6811 tmp = XEXP (addr, 1);
6812 if (!CONST_INT_P (tmp))
6814 scale = INTVAL (tmp);
6815 if ((unsigned HOST_WIDE_INT) scale > 3)
6821 disp = addr; /* displacement */
6823 /* Extract the integral value of scale. */
6826 if (!CONST_INT_P (scale_rtx))
6828 scale = INTVAL (scale_rtx);
6831 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6832 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6834 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6835 if (base_reg && index_reg && scale == 1
6836 && (index_reg == arg_pointer_rtx
6837 || index_reg == frame_pointer_rtx
6838 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6841 tmp = base, base = index, index = tmp;
6842 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6845 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6846 if ((base_reg == hard_frame_pointer_rtx
6847 || base_reg == frame_pointer_rtx
6848 || base_reg == arg_pointer_rtx) && !disp)
6851 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6852 Avoid this by transforming to [%esi+0]. */
6853 if (TARGET_K6 && !optimize_size
6854 && base_reg && !index_reg && !disp
6856 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6859 /* Special case: encode reg+reg instead of reg*2. */
6860 if (!base && index && scale && scale == 2)
6861 base = index, base_reg = index_reg, scale = 1;
6863 /* Special case: scaling cannot be encoded without base or displacement. */
6864 if (!base && !disp && index && scale != 1)
6876 /* Return cost of the memory address x.
6877 For i386, it is better to use a complex address than let gcc copy
6878 the address into a reg and make a new pseudo. But not if the address
6879 requires to two regs - that would mean more pseudos with longer
6882 ix86_address_cost (rtx x)
6884 struct ix86_address parts;
6886 int ok = ix86_decompose_address (x, &parts);
6890 if (parts.base && GET_CODE (parts.base) == SUBREG)
6891 parts.base = SUBREG_REG (parts.base);
6892 if (parts.index && GET_CODE (parts.index) == SUBREG)
6893 parts.index = SUBREG_REG (parts.index);
6895 /* Attempt to minimize number of registers in the address. */
6897 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6899 && (!REG_P (parts.index)
6900 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6904 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6906 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6907 && parts.base != parts.index)
6910 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6911 since it's predecode logic can't detect the length of instructions
6912 and it degenerates to vector decoded. Increase cost of such
6913 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6914 to split such addresses or even refuse such addresses at all.
6916 Following addressing modes are affected:
6921 The first and last case may be avoidable by explicitly coding the zero in
6922 memory address, but I don't have AMD-K6 machine handy to check this
6926 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6927 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6928 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6934 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6935 this is used for to form addresses to local data when -fPIC is in
6939 darwin_local_data_pic (rtx disp)
6941 if (GET_CODE (disp) == MINUS)
6943 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6944 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6945 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6947 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6948 if (! strcmp (sym_name, "<pic base>"))
6956 /* Determine if a given RTX is a valid constant. We already know this
6957 satisfies CONSTANT_P. */
6960 legitimate_constant_p (rtx x)
6962 switch (GET_CODE (x))
6967 if (GET_CODE (x) == PLUS)
6969 if (!CONST_INT_P (XEXP (x, 1)))
6974 if (TARGET_MACHO && darwin_local_data_pic (x))
6977 /* Only some unspecs are valid as "constants". */
6978 if (GET_CODE (x) == UNSPEC)
6979 switch (XINT (x, 1))
6984 return TARGET_64BIT;
6987 x = XVECEXP (x, 0, 0);
6988 return (GET_CODE (x) == SYMBOL_REF
6989 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6991 x = XVECEXP (x, 0, 0);
6992 return (GET_CODE (x) == SYMBOL_REF
6993 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6998 /* We must have drilled down to a symbol. */
6999 if (GET_CODE (x) == LABEL_REF)
7001 if (GET_CODE (x) != SYMBOL_REF)
7006 /* TLS symbols are never valid. */
7007 if (SYMBOL_REF_TLS_MODEL (x))
7010 /* DLLIMPORT symbols are never valid. */
7011 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7012 && SYMBOL_REF_DLLIMPORT_P (x))
7017 if (GET_MODE (x) == TImode
7018 && x != CONST0_RTX (TImode)
7024 if (x == CONST0_RTX (GET_MODE (x)))
7032 /* Otherwise we handle everything else in the move patterns. */
7036 /* Determine if it's legal to put X into the constant pool. This
7037 is not possible for the address of thread-local symbols, which
7038 is checked above. */
7041 ix86_cannot_force_const_mem (rtx x)
7043 /* We can always put integral constants and vectors in memory. */
7044 switch (GET_CODE (x))
7054 return !legitimate_constant_p (x);
7057 /* Determine if a given RTX is a valid constant address. */
7060 constant_address_p (rtx x)
7062 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7065 /* Nonzero if the constant value X is a legitimate general operand
7066 when generating PIC code. It is given that flag_pic is on and
7067 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7070 legitimate_pic_operand_p (rtx x)
7074 switch (GET_CODE (x))
7077 inner = XEXP (x, 0);
7078 if (GET_CODE (inner) == PLUS
7079 && CONST_INT_P (XEXP (inner, 1)))
7080 inner = XEXP (inner, 0);
7082 /* Only some unspecs are valid as "constants". */
7083 if (GET_CODE (inner) == UNSPEC)
7084 switch (XINT (inner, 1))
7089 return TARGET_64BIT;
7091 x = XVECEXP (inner, 0, 0);
7092 return (GET_CODE (x) == SYMBOL_REF
7093 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7101 return legitimate_pic_address_disp_p (x);
7108 /* Determine if a given CONST RTX is a valid memory displacement
7112 legitimate_pic_address_disp_p (rtx disp)
7116 /* In 64bit mode we can allow direct addresses of symbols and labels
7117 when they are not dynamic symbols. */
7120 rtx op0 = disp, op1;
7122 switch (GET_CODE (disp))
7128 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7130 op0 = XEXP (XEXP (disp, 0), 0);
7131 op1 = XEXP (XEXP (disp, 0), 1);
7132 if (!CONST_INT_P (op1)
7133 || INTVAL (op1) >= 16*1024*1024
7134 || INTVAL (op1) < -16*1024*1024)
7136 if (GET_CODE (op0) == LABEL_REF)
7138 if (GET_CODE (op0) != SYMBOL_REF)
7143 /* TLS references should always be enclosed in UNSPEC. */
7144 if (SYMBOL_REF_TLS_MODEL (op0))
7146 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7147 && ix86_cmodel != CM_LARGE_PIC)
7155 if (GET_CODE (disp) != CONST)
7157 disp = XEXP (disp, 0);
7161 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7162 of GOT tables. We should not need these anyway. */
7163 if (GET_CODE (disp) != UNSPEC
7164 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7165 && XINT (disp, 1) != UNSPEC_GOTOFF
7166 && XINT (disp, 1) != UNSPEC_PLTOFF))
7169 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7170 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7176 if (GET_CODE (disp) == PLUS)
7178 if (!CONST_INT_P (XEXP (disp, 1)))
7180 disp = XEXP (disp, 0);
7184 if (TARGET_MACHO && darwin_local_data_pic (disp))
7187 if (GET_CODE (disp) != UNSPEC)
7190 switch (XINT (disp, 1))
7195 /* We need to check for both symbols and labels because VxWorks loads
7196 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7198 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7199 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7201 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7202 While ABI specify also 32bit relocation but we don't produce it in
7203 small PIC model at all. */
7204 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7205 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7207 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7209 case UNSPEC_GOTTPOFF:
7210 case UNSPEC_GOTNTPOFF:
7211 case UNSPEC_INDNTPOFF:
7214 disp = XVECEXP (disp, 0, 0);
7215 return (GET_CODE (disp) == SYMBOL_REF
7216 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7218 disp = XVECEXP (disp, 0, 0);
7219 return (GET_CODE (disp) == SYMBOL_REF
7220 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7222 disp = XVECEXP (disp, 0, 0);
7223 return (GET_CODE (disp) == SYMBOL_REF
7224 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7230 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7231 memory address for an instruction. The MODE argument is the machine mode
7232 for the MEM expression that wants to use this address.
7234 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7235 convert common non-canonical forms to canonical form so that they will
7239 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7240 rtx addr, int strict)
7242 struct ix86_address parts;
7243 rtx base, index, disp;
7244 HOST_WIDE_INT scale;
7245 const char *reason = NULL;
7246 rtx reason_rtx = NULL_RTX;
7248 if (ix86_decompose_address (addr, &parts) <= 0)
7250 reason = "decomposition failed";
7255 index = parts.index;
7257 scale = parts.scale;
7259 /* Validate base register.
7261 Don't allow SUBREG's that span more than a word here. It can lead to spill
7262 failures when the base is one word out of a two word structure, which is
7263 represented internally as a DImode int. */
7272 else if (GET_CODE (base) == SUBREG
7273 && REG_P (SUBREG_REG (base))
7274 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7276 reg = SUBREG_REG (base);
7279 reason = "base is not a register";
7283 if (GET_MODE (base) != Pmode)
7285 reason = "base is not in Pmode";
7289 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7290 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7292 reason = "base is not valid";
7297 /* Validate index register.
7299 Don't allow SUBREG's that span more than a word here -- same as above. */
7308 else if (GET_CODE (index) == SUBREG
7309 && REG_P (SUBREG_REG (index))
7310 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7312 reg = SUBREG_REG (index);
7315 reason = "index is not a register";
7319 if (GET_MODE (index) != Pmode)
7321 reason = "index is not in Pmode";
7325 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7326 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7328 reason = "index is not valid";
7333 /* Validate scale factor. */
7336 reason_rtx = GEN_INT (scale);
7339 reason = "scale without index";
7343 if (scale != 2 && scale != 4 && scale != 8)
7345 reason = "scale is not a valid multiplier";
7350 /* Validate displacement. */
7355 if (GET_CODE (disp) == CONST
7356 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7357 switch (XINT (XEXP (disp, 0), 1))
7359 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7360 used. While ABI specify also 32bit relocations, we don't produce
7361 them at all and use IP relative instead. */
7364 gcc_assert (flag_pic);
7366 goto is_legitimate_pic;
7367 reason = "64bit address unspec";
7370 case UNSPEC_GOTPCREL:
7371 gcc_assert (flag_pic);
7372 goto is_legitimate_pic;
7374 case UNSPEC_GOTTPOFF:
7375 case UNSPEC_GOTNTPOFF:
7376 case UNSPEC_INDNTPOFF:
7382 reason = "invalid address unspec";
7386 else if (SYMBOLIC_CONST (disp)
7390 && MACHOPIC_INDIRECT
7391 && !machopic_operand_p (disp)
7397 if (TARGET_64BIT && (index || base))
7399 /* foo@dtpoff(%rX) is ok. */
7400 if (GET_CODE (disp) != CONST
7401 || GET_CODE (XEXP (disp, 0)) != PLUS
7402 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7403 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7404 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7405 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7407 reason = "non-constant pic memory reference";
7411 else if (! legitimate_pic_address_disp_p (disp))
7413 reason = "displacement is an invalid pic construct";
7417 /* This code used to verify that a symbolic pic displacement
7418 includes the pic_offset_table_rtx register.
7420 While this is good idea, unfortunately these constructs may
7421 be created by "adds using lea" optimization for incorrect
7430 This code is nonsensical, but results in addressing
7431 GOT table with pic_offset_table_rtx base. We can't
7432 just refuse it easily, since it gets matched by
7433 "addsi3" pattern, that later gets split to lea in the
7434 case output register differs from input. While this
7435 can be handled by separate addsi pattern for this case
7436 that never results in lea, this seems to be easier and
7437 correct fix for crash to disable this test. */
7439 else if (GET_CODE (disp) != LABEL_REF
7440 && !CONST_INT_P (disp)
7441 && (GET_CODE (disp) != CONST
7442 || !legitimate_constant_p (disp))
7443 && (GET_CODE (disp) != SYMBOL_REF
7444 || !legitimate_constant_p (disp)))
7446 reason = "displacement is not constant";
7449 else if (TARGET_64BIT
7450 && !x86_64_immediate_operand (disp, VOIDmode))
7452 reason = "displacement is out of range";
7457 /* Everything looks valid. */
7464 /* Return a unique alias set for the GOT. */
7466 static alias_set_type
7467 ix86_GOT_alias_set (void)
7469 static alias_set_type set = -1;
7471 set = new_alias_set ();
7475 /* Return a legitimate reference for ORIG (an address) using the
7476 register REG. If REG is 0, a new pseudo is generated.
7478 There are two types of references that must be handled:
7480 1. Global data references must load the address from the GOT, via
7481 the PIC reg. An insn is emitted to do this load, and the reg is
7484 2. Static data references, constant pool addresses, and code labels
7485 compute the address as an offset from the GOT, whose base is in
7486 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7487 differentiate them from global data objects. The returned
7488 address is the PIC reg + an unspec constant.
7490 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7491 reg also appears in the address. */
7494 legitimize_pic_address (rtx orig, rtx reg)
7501 if (TARGET_MACHO && !TARGET_64BIT)
7504 reg = gen_reg_rtx (Pmode);
7505 /* Use the generic Mach-O PIC machinery. */
7506 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7510 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7512 else if (TARGET_64BIT
7513 && ix86_cmodel != CM_SMALL_PIC
7514 && gotoff_operand (addr, Pmode))
7517 /* This symbol may be referenced via a displacement from the PIC
7518 base address (@GOTOFF). */
7520 if (reload_in_progress)
7521 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7522 if (GET_CODE (addr) == CONST)
7523 addr = XEXP (addr, 0);
7524 if (GET_CODE (addr) == PLUS)
7526 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7528 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7531 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7532 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7534 tmpreg = gen_reg_rtx (Pmode);
7537 emit_move_insn (tmpreg, new_rtx);
7541 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7542 tmpreg, 1, OPTAB_DIRECT);
7545 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7547 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7549 /* This symbol may be referenced via a displacement from the PIC
7550 base address (@GOTOFF). */
7552 if (reload_in_progress)
7553 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7554 if (GET_CODE (addr) == CONST)
7555 addr = XEXP (addr, 0);
7556 if (GET_CODE (addr) == PLUS)
7558 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7560 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7563 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7564 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7565 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7569 emit_move_insn (reg, new_rtx);
7573 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7574 /* We can't use @GOTOFF for text labels on VxWorks;
7575 see gotoff_operand. */
7576 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7578 /* Given that we've already handled dllimport variables separately
7579 in legitimize_address, and all other variables should satisfy
7580 legitimate_pic_address_disp_p, we should never arrive here. */
7581 gcc_assert (!TARGET_64BIT_MS_ABI);
7583 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7585 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7586 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7587 new_rtx = gen_const_mem (Pmode, new_rtx);
7588 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7591 reg = gen_reg_rtx (Pmode);
7592 /* Use directly gen_movsi, otherwise the address is loaded
7593 into register for CSE. We don't want to CSE this addresses,
7594 instead we CSE addresses from the GOT table, so skip this. */
7595 emit_insn (gen_movsi (reg, new_rtx));
7600 /* This symbol must be referenced via a load from the
7601 Global Offset Table (@GOT). */
7603 if (reload_in_progress)
7604 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7605 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7606 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7608 new_rtx = force_reg (Pmode, new_rtx);
7609 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7610 new_rtx = gen_const_mem (Pmode, new_rtx);
7611 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7614 reg = gen_reg_rtx (Pmode);
7615 emit_move_insn (reg, new_rtx);
7621 if (CONST_INT_P (addr)
7622 && !x86_64_immediate_operand (addr, VOIDmode))
7626 emit_move_insn (reg, addr);
7630 new_rtx = force_reg (Pmode, addr);
7632 else if (GET_CODE (addr) == CONST)
7634 addr = XEXP (addr, 0);
7636 /* We must match stuff we generate before. Assume the only
7637 unspecs that can get here are ours. Not that we could do
7638 anything with them anyway.... */
7639 if (GET_CODE (addr) == UNSPEC
7640 || (GET_CODE (addr) == PLUS
7641 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7643 gcc_assert (GET_CODE (addr) == PLUS);
7645 if (GET_CODE (addr) == PLUS)
7647 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7649 /* Check first to see if this is a constant offset from a @GOTOFF
7650 symbol reference. */
7651 if (gotoff_operand (op0, Pmode)
7652 && CONST_INT_P (op1))
7656 if (reload_in_progress)
7657 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7658 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7660 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7661 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7662 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7666 emit_move_insn (reg, new_rtx);
7672 if (INTVAL (op1) < -16*1024*1024
7673 || INTVAL (op1) >= 16*1024*1024)
7675 if (!x86_64_immediate_operand (op1, Pmode))
7676 op1 = force_reg (Pmode, op1);
7677 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7683 base = legitimize_pic_address (XEXP (addr, 0), reg);
7684 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7685 base == reg ? NULL_RTX : reg);
7687 if (CONST_INT_P (new_rtx))
7688 new_rtx = plus_constant (base, INTVAL (new_rtx));
7691 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7693 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7694 new_rtx = XEXP (new_rtx, 1);
7696 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7704 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7707 get_thread_pointer (int to_reg)
7711 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7715 reg = gen_reg_rtx (Pmode);
7716 insn = gen_rtx_SET (VOIDmode, reg, tp);
7717 insn = emit_insn (insn);
7722 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7723 false if we expect this to be used for a memory address and true if
7724 we expect to load the address into a register. */
7727 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7729 rtx dest, base, off, pic, tp;
7734 case TLS_MODEL_GLOBAL_DYNAMIC:
7735 dest = gen_reg_rtx (Pmode);
7736 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7738 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7740 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7743 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7744 insns = get_insns ();
7747 CONST_OR_PURE_CALL_P (insns) = 1;
7748 emit_libcall_block (insns, dest, rax, x);
7750 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7751 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7753 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7755 if (TARGET_GNU2_TLS)
7757 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7759 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7763 case TLS_MODEL_LOCAL_DYNAMIC:
7764 base = gen_reg_rtx (Pmode);
7765 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7767 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7769 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7772 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7773 insns = get_insns ();
7776 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7777 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7778 CONST_OR_PURE_CALL_P (insns) = 1;
7779 emit_libcall_block (insns, base, rax, note);
7781 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7782 emit_insn (gen_tls_local_dynamic_base_64 (base));
7784 emit_insn (gen_tls_local_dynamic_base_32 (base));
7786 if (TARGET_GNU2_TLS)
7788 rtx x = ix86_tls_module_base ();
7790 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7791 gen_rtx_MINUS (Pmode, x, tp));
7794 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7795 off = gen_rtx_CONST (Pmode, off);
7797 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7799 if (TARGET_GNU2_TLS)
7801 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7803 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7808 case TLS_MODEL_INITIAL_EXEC:
7812 type = UNSPEC_GOTNTPOFF;
7816 if (reload_in_progress)
7817 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7818 pic = pic_offset_table_rtx;
7819 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7821 else if (!TARGET_ANY_GNU_TLS)
7823 pic = gen_reg_rtx (Pmode);
7824 emit_insn (gen_set_got (pic));
7825 type = UNSPEC_GOTTPOFF;
7830 type = UNSPEC_INDNTPOFF;
7833 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7834 off = gen_rtx_CONST (Pmode, off);
7836 off = gen_rtx_PLUS (Pmode, pic, off);
7837 off = gen_const_mem (Pmode, off);
7838 set_mem_alias_set (off, ix86_GOT_alias_set ());
7840 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7842 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7843 off = force_reg (Pmode, off);
7844 return gen_rtx_PLUS (Pmode, base, off);
7848 base = get_thread_pointer (true);
7849 dest = gen_reg_rtx (Pmode);
7850 emit_insn (gen_subsi3 (dest, base, off));
7854 case TLS_MODEL_LOCAL_EXEC:
7855 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7856 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7857 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7858 off = gen_rtx_CONST (Pmode, off);
7860 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7862 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7863 return gen_rtx_PLUS (Pmode, base, off);
7867 base = get_thread_pointer (true);
7868 dest = gen_reg_rtx (Pmode);
7869 emit_insn (gen_subsi3 (dest, base, off));
7880 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7883 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7884 htab_t dllimport_map;
7887 get_dllimport_decl (tree decl)
7889 struct tree_map *h, in;
7893 size_t namelen, prefixlen;
7899 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7901 in.hash = htab_hash_pointer (decl);
7902 in.base.from = decl;
7903 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7904 h = (struct tree_map *) *loc;
7908 *loc = h = GGC_NEW (struct tree_map);
7910 h->base.from = decl;
7911 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7912 DECL_ARTIFICIAL (to) = 1;
7913 DECL_IGNORED_P (to) = 1;
7914 DECL_EXTERNAL (to) = 1;
7915 TREE_READONLY (to) = 1;
7917 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7918 name = targetm.strip_name_encoding (name);
7919 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
7920 namelen = strlen (name);
7921 prefixlen = strlen (prefix);
7922 imp_name = (char *) alloca (namelen + prefixlen + 1);
7923 memcpy (imp_name, prefix, prefixlen);
7924 memcpy (imp_name + prefixlen, name, namelen + 1);
7926 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7927 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7928 SET_SYMBOL_REF_DECL (rtl, to);
7929 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7931 rtl = gen_const_mem (Pmode, rtl);
7932 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7934 SET_DECL_RTL (to, rtl);
7935 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
7940 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7941 true if we require the result be a register. */
7944 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7949 gcc_assert (SYMBOL_REF_DECL (symbol));
7950 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7952 x = DECL_RTL (imp_decl);
7954 x = force_reg (Pmode, x);
7958 /* Try machine-dependent ways of modifying an illegitimate address
7959 to be legitimate. If we find one, return the new, valid address.
7960 This macro is used in only one place: `memory_address' in explow.c.
7962 OLDX is the address as it was before break_out_memory_refs was called.
7963 In some cases it is useful to look at this to decide what needs to be done.
7965 MODE and WIN are passed so that this macro can use
7966 GO_IF_LEGITIMATE_ADDRESS.
7968 It is always safe for this macro to do nothing. It exists to recognize
7969 opportunities to optimize the output.
7971 For the 80386, we handle X+REG by loading X into a register R and
7972 using R+REG. R will go in a general reg and indexing will be used.
7973 However, if REG is a broken-out memory address or multiplication,
7974 nothing needs to be done because REG can certainly go in a general reg.
7976 When -fpic is used, special handling is needed for symbolic references.
7977 See comments by legitimize_pic_address in i386.c for details. */
7980 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7985 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7987 return legitimize_tls_address (x, (enum tls_model) log, false);
7988 if (GET_CODE (x) == CONST
7989 && GET_CODE (XEXP (x, 0)) == PLUS
7990 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7991 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7993 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7994 (enum tls_model) log, false);
7995 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7998 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8000 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8001 return legitimize_dllimport_symbol (x, true);
8002 if (GET_CODE (x) == CONST
8003 && GET_CODE (XEXP (x, 0)) == PLUS
8004 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8005 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8007 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8008 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8012 if (flag_pic && SYMBOLIC_CONST (x))
8013 return legitimize_pic_address (x, 0);
8015 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8016 if (GET_CODE (x) == ASHIFT
8017 && CONST_INT_P (XEXP (x, 1))
8018 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8021 log = INTVAL (XEXP (x, 1));
8022 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8023 GEN_INT (1 << log));
8026 if (GET_CODE (x) == PLUS)
8028 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8030 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8031 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8032 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8035 log = INTVAL (XEXP (XEXP (x, 0), 1));
8036 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8037 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8038 GEN_INT (1 << log));
8041 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8042 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8043 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8046 log = INTVAL (XEXP (XEXP (x, 1), 1));
8047 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8048 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8049 GEN_INT (1 << log));
8052 /* Put multiply first if it isn't already. */
8053 if (GET_CODE (XEXP (x, 1)) == MULT)
8055 rtx tmp = XEXP (x, 0);
8056 XEXP (x, 0) = XEXP (x, 1);
8061 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8062 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8063 created by virtual register instantiation, register elimination, and
8064 similar optimizations. */
8065 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8068 x = gen_rtx_PLUS (Pmode,
8069 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8070 XEXP (XEXP (x, 1), 0)),
8071 XEXP (XEXP (x, 1), 1));
8075 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8076 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8077 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8078 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8079 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8080 && CONSTANT_P (XEXP (x, 1)))
8083 rtx other = NULL_RTX;
8085 if (CONST_INT_P (XEXP (x, 1)))
8087 constant = XEXP (x, 1);
8088 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8090 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8092 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8093 other = XEXP (x, 1);
8101 x = gen_rtx_PLUS (Pmode,
8102 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8103 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8104 plus_constant (other, INTVAL (constant)));
8108 if (changed && legitimate_address_p (mode, x, FALSE))
8111 if (GET_CODE (XEXP (x, 0)) == MULT)
8114 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8117 if (GET_CODE (XEXP (x, 1)) == MULT)
8120 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8124 && REG_P (XEXP (x, 1))
8125 && REG_P (XEXP (x, 0)))
8128 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8131 x = legitimize_pic_address (x, 0);
8134 if (changed && legitimate_address_p (mode, x, FALSE))
8137 if (REG_P (XEXP (x, 0)))
8139 rtx temp = gen_reg_rtx (Pmode);
8140 rtx val = force_operand (XEXP (x, 1), temp);
8142 emit_move_insn (temp, val);
8148 else if (REG_P (XEXP (x, 1)))
8150 rtx temp = gen_reg_rtx (Pmode);
8151 rtx val = force_operand (XEXP (x, 0), temp);
8153 emit_move_insn (temp, val);
8163 /* Print an integer constant expression in assembler syntax. Addition
8164 and subtraction are the only arithmetic that may appear in these
8165 expressions. FILE is the stdio stream to write to, X is the rtx, and
8166 CODE is the operand print code from the output string. */
8169 output_pic_addr_const (FILE *file, rtx x, int code)
8173 switch (GET_CODE (x))
8176 gcc_assert (flag_pic);
8181 if (! TARGET_MACHO || TARGET_64BIT)
8182 output_addr_const (file, x);
8185 const char *name = XSTR (x, 0);
8187 /* Mark the decl as referenced so that cgraph will
8188 output the function. */
8189 if (SYMBOL_REF_DECL (x))
8190 mark_decl_referenced (SYMBOL_REF_DECL (x));
8193 if (MACHOPIC_INDIRECT
8194 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8195 name = machopic_indirection_name (x, /*stub_p=*/true);
8197 assemble_name (file, name);
8199 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8200 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8201 fputs ("@PLT", file);
8208 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8209 assemble_name (asm_out_file, buf);
8213 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8217 /* This used to output parentheses around the expression,
8218 but that does not work on the 386 (either ATT or BSD assembler). */
8219 output_pic_addr_const (file, XEXP (x, 0), code);
8223 if (GET_MODE (x) == VOIDmode)
8225 /* We can use %d if the number is <32 bits and positive. */
8226 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8227 fprintf (file, "0x%lx%08lx",
8228 (unsigned long) CONST_DOUBLE_HIGH (x),
8229 (unsigned long) CONST_DOUBLE_LOW (x));
8231 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8234 /* We can't handle floating point constants;
8235 PRINT_OPERAND must handle them. */
8236 output_operand_lossage ("floating constant misused");
8240 /* Some assemblers need integer constants to appear first. */
8241 if (CONST_INT_P (XEXP (x, 0)))
8243 output_pic_addr_const (file, XEXP (x, 0), code);
8245 output_pic_addr_const (file, XEXP (x, 1), code);
8249 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8250 output_pic_addr_const (file, XEXP (x, 1), code);
8252 output_pic_addr_const (file, XEXP (x, 0), code);
8258 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8259 output_pic_addr_const (file, XEXP (x, 0), code);
8261 output_pic_addr_const (file, XEXP (x, 1), code);
8263 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8267 gcc_assert (XVECLEN (x, 0) == 1);
8268 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8269 switch (XINT (x, 1))
8272 fputs ("@GOT", file);
8275 fputs ("@GOTOFF", file);
8278 fputs ("@PLTOFF", file);
8280 case UNSPEC_GOTPCREL:
8281 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8282 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8284 case UNSPEC_GOTTPOFF:
8285 /* FIXME: This might be @TPOFF in Sun ld too. */
8286 fputs ("@GOTTPOFF", file);
8289 fputs ("@TPOFF", file);
8293 fputs ("@TPOFF", file);
8295 fputs ("@NTPOFF", file);
8298 fputs ("@DTPOFF", file);
8300 case UNSPEC_GOTNTPOFF:
8302 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8303 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8305 fputs ("@GOTNTPOFF", file);
8307 case UNSPEC_INDNTPOFF:
8308 fputs ("@INDNTPOFF", file);
8311 output_operand_lossage ("invalid UNSPEC as operand");
8317 output_operand_lossage ("invalid expression as operand");
8321 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8322 We need to emit DTP-relative relocations. */
8324 static void ATTRIBUTE_UNUSED
8325 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8327 fputs (ASM_LONG, file);
8328 output_addr_const (file, x);
8329 fputs ("@DTPOFF", file);
8335 fputs (", 0", file);
8342 /* In the name of slightly smaller debug output, and to cater to
8343 general assembler lossage, recognize PIC+GOTOFF and turn it back
8344 into a direct symbol reference.
8346 On Darwin, this is necessary to avoid a crash, because Darwin
8347 has a different PIC label for each routine but the DWARF debugging
8348 information is not associated with any particular routine, so it's
8349 necessary to remove references to the PIC label from RTL stored by
8350 the DWARF output code. */
8353 ix86_delegitimize_address (rtx orig_x)
8356 /* reg_addend is NULL or a multiple of some register. */
8357 rtx reg_addend = NULL_RTX;
8358 /* const_addend is NULL or a const_int. */
8359 rtx const_addend = NULL_RTX;
8360 /* This is the result, or NULL. */
8361 rtx result = NULL_RTX;
8368 if (GET_CODE (x) != CONST
8369 || GET_CODE (XEXP (x, 0)) != UNSPEC
8370 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8373 return XVECEXP (XEXP (x, 0), 0, 0);
8376 if (GET_CODE (x) != PLUS
8377 || GET_CODE (XEXP (x, 1)) != CONST)
8380 if (REG_P (XEXP (x, 0))
8381 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8382 /* %ebx + GOT/GOTOFF */
8384 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8386 /* %ebx + %reg * scale + GOT/GOTOFF */
8387 reg_addend = XEXP (x, 0);
8388 if (REG_P (XEXP (reg_addend, 0))
8389 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8390 reg_addend = XEXP (reg_addend, 1);
8391 else if (REG_P (XEXP (reg_addend, 1))
8392 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8393 reg_addend = XEXP (reg_addend, 0);
8396 if (!REG_P (reg_addend)
8397 && GET_CODE (reg_addend) != MULT
8398 && GET_CODE (reg_addend) != ASHIFT)
8404 x = XEXP (XEXP (x, 1), 0);
8405 if (GET_CODE (x) == PLUS
8406 && CONST_INT_P (XEXP (x, 1)))
8408 const_addend = XEXP (x, 1);
8412 if (GET_CODE (x) == UNSPEC
8413 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8414 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8415 result = XVECEXP (x, 0, 0);
8417 if (TARGET_MACHO && darwin_local_data_pic (x)
8419 result = XEXP (x, 0);
8425 result = gen_rtx_PLUS (Pmode, result, const_addend);
8427 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8431 /* If X is a machine specific address (i.e. a symbol or label being
8432 referenced as a displacement from the GOT implemented using an
8433 UNSPEC), then return the base term. Otherwise return X. */
8436 ix86_find_base_term (rtx x)
8442 if (GET_CODE (x) != CONST)
8445 if (GET_CODE (term) == PLUS
8446 && (CONST_INT_P (XEXP (term, 1))
8447 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8448 term = XEXP (term, 0);
8449 if (GET_CODE (term) != UNSPEC
8450 || XINT (term, 1) != UNSPEC_GOTPCREL)
8453 term = XVECEXP (term, 0, 0);
8455 if (GET_CODE (term) != SYMBOL_REF
8456 && GET_CODE (term) != LABEL_REF)
8462 term = ix86_delegitimize_address (x);
8464 if (GET_CODE (term) != SYMBOL_REF
8465 && GET_CODE (term) != LABEL_REF)
8472 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8477 if (mode == CCFPmode || mode == CCFPUmode)
8479 enum rtx_code second_code, bypass_code;
8480 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8481 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8482 code = ix86_fp_compare_code_to_integer (code);
8486 code = reverse_condition (code);
8537 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8541 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8542 Those same assemblers have the same but opposite lossage on cmov. */
8544 suffix = fp ? "nbe" : "a";
8545 else if (mode == CCCmode)
8568 gcc_assert (mode == CCmode || mode == CCCmode);
8590 gcc_assert (mode == CCmode || mode == CCCmode);
8591 suffix = fp ? "nb" : "ae";
8594 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8601 else if (mode == CCCmode)
8602 suffix = fp ? "nb" : "ae";
8607 suffix = fp ? "u" : "p";
8610 suffix = fp ? "nu" : "np";
8615 fputs (suffix, file);
8618 /* Print the name of register X to FILE based on its machine mode and number.
8619 If CODE is 'w', pretend the mode is HImode.
8620 If CODE is 'b', pretend the mode is QImode.
8621 If CODE is 'k', pretend the mode is SImode.
8622 If CODE is 'q', pretend the mode is DImode.
8623 If CODE is 'h', pretend the reg is the 'high' byte register.
8624 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8627 print_reg (rtx x, int code, FILE *file)
8629 gcc_assert (x == pc_rtx
8630 || (REGNO (x) != ARG_POINTER_REGNUM
8631 && REGNO (x) != FRAME_POINTER_REGNUM
8632 && REGNO (x) != FLAGS_REG
8633 && REGNO (x) != FPSR_REG
8634 && REGNO (x) != FPCR_REG));
8636 if (ASSEMBLER_DIALECT == ASM_ATT)
8641 gcc_assert (TARGET_64BIT);
8642 fputs ("rip", file);
8646 if (code == 'w' || MMX_REG_P (x))
8648 else if (code == 'b')
8650 else if (code == 'k')
8652 else if (code == 'q')
8654 else if (code == 'y')
8656 else if (code == 'h')
8659 code = GET_MODE_SIZE (GET_MODE (x));
8661 /* Irritatingly, AMD extended registers use different naming convention
8662 from the normal registers. */
8663 if (REX_INT_REG_P (x))
8665 gcc_assert (TARGET_64BIT);
8669 error ("extended registers have no high halves");
8672 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8675 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8678 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8681 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8684 error ("unsupported operand size for extended register");
8692 if (STACK_TOP_P (x))
8694 fputs ("st(0)", file);
8701 if (! ANY_FP_REG_P (x))
8702 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8707 fputs (hi_reg_name[REGNO (x)], file);
8710 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8712 fputs (qi_reg_name[REGNO (x)], file);
8715 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8717 fputs (qi_high_reg_name[REGNO (x)], file);
8724 /* Locate some local-dynamic symbol still in use by this function
8725 so that we can print its name in some tls_local_dynamic_base
8729 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8733 if (GET_CODE (x) == SYMBOL_REF
8734 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8736 cfun->machine->some_ld_name = XSTR (x, 0);
8744 get_some_local_dynamic_name (void)
8748 if (cfun->machine->some_ld_name)
8749 return cfun->machine->some_ld_name;
8751 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8753 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8754 return cfun->machine->some_ld_name;
8760 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8761 C -- print opcode suffix for set/cmov insn.
8762 c -- like C, but print reversed condition
8763 F,f -- likewise, but for floating-point.
8764 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8766 R -- print the prefix for register names.
8767 z -- print the opcode suffix for the size of the current operand.
8768 * -- print a star (in certain assembler syntax)
8769 A -- print an absolute memory reference.
8770 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8771 s -- print a shift double count, followed by the assemblers argument
8773 b -- print the QImode name of the register for the indicated operand.
8774 %b0 would print %al if operands[0] is reg 0.
8775 w -- likewise, print the HImode name of the register.
8776 k -- likewise, print the SImode name of the register.
8777 q -- likewise, print the DImode name of the register.
8778 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8779 y -- print "st(0)" instead of "st" as a register.
8780 D -- print condition for SSE cmp instruction.
8781 P -- if PIC, print an @PLT suffix.
8782 X -- don't print any sort of PIC '@' suffix for a symbol.
8783 & -- print some in-use local-dynamic symbol name.
8784 H -- print a memory address offset by 8; used for sse high-parts
8785 Y -- print condition for SSE5 com* instruction.
8786 + -- print a branch hint as 'cs' or 'ds' prefix
8787 ; -- print a semicolon (after prefixes due to bug in older gas).
8791 print_operand (FILE *file, rtx x, int code)
8798 if (ASSEMBLER_DIALECT == ASM_ATT)
8803 assemble_name (file, get_some_local_dynamic_name ());
8807 switch (ASSEMBLER_DIALECT)
8814 /* Intel syntax. For absolute addresses, registers should not
8815 be surrounded by braces. */
8819 PRINT_OPERAND (file, x, 0);
8829 PRINT_OPERAND (file, x, 0);
8834 if (ASSEMBLER_DIALECT == ASM_ATT)
8839 if (ASSEMBLER_DIALECT == ASM_ATT)
8844 if (ASSEMBLER_DIALECT == ASM_ATT)
8849 if (ASSEMBLER_DIALECT == ASM_ATT)
8854 if (ASSEMBLER_DIALECT == ASM_ATT)
8859 if (ASSEMBLER_DIALECT == ASM_ATT)
8864 /* 387 opcodes don't get size suffixes if the operands are
8866 if (STACK_REG_P (x))
8869 /* Likewise if using Intel opcodes. */
8870 if (ASSEMBLER_DIALECT == ASM_INTEL)
8873 /* This is the size of op from size of operand. */
8874 switch (GET_MODE_SIZE (GET_MODE (x)))
8883 #ifdef HAVE_GAS_FILDS_FISTS
8893 if (GET_MODE (x) == SFmode)
8908 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8910 #ifdef GAS_MNEMONICS
8936 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8938 PRINT_OPERAND (file, x, 0);
8944 /* Little bit of braindamage here. The SSE compare instructions
8945 does use completely different names for the comparisons that the
8946 fp conditional moves. */
8947 switch (GET_CODE (x))
8962 fputs ("unord", file);
8966 fputs ("neq", file);
8970 fputs ("nlt", file);
8974 fputs ("nle", file);
8977 fputs ("ord", file);
8984 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8985 if (ASSEMBLER_DIALECT == ASM_ATT)
8987 switch (GET_MODE (x))
8989 case HImode: putc ('w', file); break;
8991 case SFmode: putc ('l', file); break;
8993 case DFmode: putc ('q', file); break;
8994 default: gcc_unreachable ();
9001 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9004 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9005 if (ASSEMBLER_DIALECT == ASM_ATT)
9008 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9011 /* Like above, but reverse condition */
9013 /* Check to see if argument to %c is really a constant
9014 and not a condition code which needs to be reversed. */
9015 if (!COMPARISON_P (x))
9017 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9020 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9023 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9024 if (ASSEMBLER_DIALECT == ASM_ATT)
9027 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9031 /* It doesn't actually matter what mode we use here, as we're
9032 only going to use this for printing. */
9033 x = adjust_address_nv (x, DImode, 8);
9040 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9043 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9046 int pred_val = INTVAL (XEXP (x, 0));
9048 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9049 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9051 int taken = pred_val > REG_BR_PROB_BASE / 2;
9052 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9054 /* Emit hints only in the case default branch prediction
9055 heuristics would fail. */
9056 if (taken != cputaken)
9058 /* We use 3e (DS) prefix for taken branches and
9059 2e (CS) prefix for not taken branches. */
9061 fputs ("ds ; ", file);
9063 fputs ("cs ; ", file);
9071 switch (GET_CODE (x))
9074 fputs ("neq", file);
9081 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9085 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9096 fputs ("unord", file);
9099 fputs ("ord", file);
9102 fputs ("ueq", file);
9105 fputs ("nlt", file);
9108 fputs ("nle", file);
9111 fputs ("ule", file);
9114 fputs ("ult", file);
9117 fputs ("une", file);
9126 fputs (" ; ", file);
9133 output_operand_lossage ("invalid operand code '%c'", code);
9138 print_reg (x, code, file);
9142 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9143 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9144 && GET_MODE (x) != BLKmode)
9147 switch (GET_MODE_SIZE (GET_MODE (x)))
9149 case 1: size = "BYTE"; break;
9150 case 2: size = "WORD"; break;
9151 case 4: size = "DWORD"; break;
9152 case 8: size = "QWORD"; break;
9153 case 12: size = "XWORD"; break;
9155 if (GET_MODE (x) == XFmode)
9164 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9167 else if (code == 'w')
9169 else if (code == 'k')
9173 fputs (" PTR ", file);
9177 /* Avoid (%rip) for call operands. */
9178 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9179 && !CONST_INT_P (x))
9180 output_addr_const (file, x);
9181 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9182 output_operand_lossage ("invalid constraints for operand");
9187 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9192 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9193 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9195 if (ASSEMBLER_DIALECT == ASM_ATT)
9197 fprintf (file, "0x%08lx", l);
9200 /* These float cases don't actually occur as immediate operands. */
9201 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9205 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9206 fprintf (file, "%s", dstr);
9209 else if (GET_CODE (x) == CONST_DOUBLE
9210 && GET_MODE (x) == XFmode)
9214 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9215 fprintf (file, "%s", dstr);
9220 /* We have patterns that allow zero sets of memory, for instance.
9221 In 64-bit mode, we should probably support all 8-byte vectors,
9222 since we can in fact encode that into an immediate. */
9223 if (GET_CODE (x) == CONST_VECTOR)
9225 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9231 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9233 if (ASSEMBLER_DIALECT == ASM_ATT)
9236 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9237 || GET_CODE (x) == LABEL_REF)
9239 if (ASSEMBLER_DIALECT == ASM_ATT)
9242 fputs ("OFFSET FLAT:", file);
9245 if (CONST_INT_P (x))
9246 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9248 output_pic_addr_const (file, x, code);
9250 output_addr_const (file, x);
9254 /* Print a memory operand whose address is ADDR. */
9257 print_operand_address (FILE *file, rtx addr)
9259 struct ix86_address parts;
9260 rtx base, index, disp;
9262 int ok = ix86_decompose_address (addr, &parts);
9267 index = parts.index;
9269 scale = parts.scale;
9277 if (ASSEMBLER_DIALECT == ASM_ATT)
9279 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9285 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9286 if (TARGET_64BIT && !base && !index)
9290 if (GET_CODE (disp) == CONST
9291 && GET_CODE (XEXP (disp, 0)) == PLUS
9292 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9293 symbol = XEXP (XEXP (disp, 0), 0);
9295 if (GET_CODE (symbol) == LABEL_REF
9296 || (GET_CODE (symbol) == SYMBOL_REF
9297 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9300 if (!base && !index)
9302 /* Displacement only requires special attention. */
9304 if (CONST_INT_P (disp))
9306 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9307 fputs ("ds:", file);
9308 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9311 output_pic_addr_const (file, disp, 0);
9313 output_addr_const (file, disp);
9317 if (ASSEMBLER_DIALECT == ASM_ATT)
9322 output_pic_addr_const (file, disp, 0);
9323 else if (GET_CODE (disp) == LABEL_REF)
9324 output_asm_label (disp);
9326 output_addr_const (file, disp);
9331 print_reg (base, 0, file);
9335 print_reg (index, 0, file);
9337 fprintf (file, ",%d", scale);
9343 rtx offset = NULL_RTX;
9347 /* Pull out the offset of a symbol; print any symbol itself. */
9348 if (GET_CODE (disp) == CONST
9349 && GET_CODE (XEXP (disp, 0)) == PLUS
9350 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9352 offset = XEXP (XEXP (disp, 0), 1);
9353 disp = gen_rtx_CONST (VOIDmode,
9354 XEXP (XEXP (disp, 0), 0));
9358 output_pic_addr_const (file, disp, 0);
9359 else if (GET_CODE (disp) == LABEL_REF)
9360 output_asm_label (disp);
9361 else if (CONST_INT_P (disp))
9364 output_addr_const (file, disp);
9370 print_reg (base, 0, file);
9373 if (INTVAL (offset) >= 0)
9375 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9379 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9386 print_reg (index, 0, file);
9388 fprintf (file, "*%d", scale);
9396 output_addr_const_extra (FILE *file, rtx x)
9400 if (GET_CODE (x) != UNSPEC)
9403 op = XVECEXP (x, 0, 0);
9404 switch (XINT (x, 1))
9406 case UNSPEC_GOTTPOFF:
9407 output_addr_const (file, op);
9408 /* FIXME: This might be @TPOFF in Sun ld. */
9409 fputs ("@GOTTPOFF", file);
9412 output_addr_const (file, op);
9413 fputs ("@TPOFF", file);
9416 output_addr_const (file, op);
9418 fputs ("@TPOFF", file);
9420 fputs ("@NTPOFF", file);
9423 output_addr_const (file, op);
9424 fputs ("@DTPOFF", file);
9426 case UNSPEC_GOTNTPOFF:
9427 output_addr_const (file, op);
9429 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9430 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9432 fputs ("@GOTNTPOFF", file);
9434 case UNSPEC_INDNTPOFF:
9435 output_addr_const (file, op);
9436 fputs ("@INDNTPOFF", file);
9446 /* Split one or more DImode RTL references into pairs of SImode
9447 references. The RTL can be REG, offsettable MEM, integer constant, or
9448 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9449 split and "num" is its length. lo_half and hi_half are output arrays
9450 that parallel "operands". */
9453 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9457 rtx op = operands[num];
9459 /* simplify_subreg refuse to split volatile memory addresses,
9460 but we still have to handle it. */
9463 lo_half[num] = adjust_address (op, SImode, 0);
9464 hi_half[num] = adjust_address (op, SImode, 4);
9468 lo_half[num] = simplify_gen_subreg (SImode, op,
9469 GET_MODE (op) == VOIDmode
9470 ? DImode : GET_MODE (op), 0);
9471 hi_half[num] = simplify_gen_subreg (SImode, op,
9472 GET_MODE (op) == VOIDmode
9473 ? DImode : GET_MODE (op), 4);
9477 /* Split one or more TImode RTL references into pairs of DImode
9478 references. The RTL can be REG, offsettable MEM, integer constant, or
9479 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9480 split and "num" is its length. lo_half and hi_half are output arrays
9481 that parallel "operands". */
9484 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9488 rtx op = operands[num];
9490 /* simplify_subreg refuse to split volatile memory addresses, but we
9491 still have to handle it. */
9494 lo_half[num] = adjust_address (op, DImode, 0);
9495 hi_half[num] = adjust_address (op, DImode, 8);
9499 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9500 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9505 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9506 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9507 is the expression of the binary operation. The output may either be
9508 emitted here, or returned to the caller, like all output_* functions.
9510 There is no guarantee that the operands are the same mode, as they
9511 might be within FLOAT or FLOAT_EXTEND expressions. */
9513 #ifndef SYSV386_COMPAT
9514 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9515 wants to fix the assemblers because that causes incompatibility
9516 with gcc. No-one wants to fix gcc because that causes
9517 incompatibility with assemblers... You can use the option of
9518 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9519 #define SYSV386_COMPAT 1
9523 output_387_binary_op (rtx insn, rtx *operands)
9525 static char buf[30];
9528 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9530 #ifdef ENABLE_CHECKING
9531 /* Even if we do not want to check the inputs, this documents input
9532 constraints. Which helps in understanding the following code. */
9533 if (STACK_REG_P (operands[0])
9534 && ((REG_P (operands[1])
9535 && REGNO (operands[0]) == REGNO (operands[1])
9536 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9537 || (REG_P (operands[2])
9538 && REGNO (operands[0]) == REGNO (operands[2])
9539 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9540 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9543 gcc_assert (is_sse);
9546 switch (GET_CODE (operands[3]))
9549 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9550 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9558 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9559 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9567 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9568 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9576 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9577 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9591 if (GET_MODE (operands[0]) == SFmode)
9592 strcat (buf, "ss\t{%2, %0|%0, %2}");
9594 strcat (buf, "sd\t{%2, %0|%0, %2}");
9599 switch (GET_CODE (operands[3]))
9603 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9605 rtx temp = operands[2];
9606 operands[2] = operands[1];
9610 /* know operands[0] == operands[1]. */
9612 if (MEM_P (operands[2]))
9618 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9620 if (STACK_TOP_P (operands[0]))
9621 /* How is it that we are storing to a dead operand[2]?
9622 Well, presumably operands[1] is dead too. We can't
9623 store the result to st(0) as st(0) gets popped on this
9624 instruction. Instead store to operands[2] (which I
9625 think has to be st(1)). st(1) will be popped later.
9626 gcc <= 2.8.1 didn't have this check and generated
9627 assembly code that the Unixware assembler rejected. */
9628 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9630 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9634 if (STACK_TOP_P (operands[0]))
9635 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9637 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9642 if (MEM_P (operands[1]))
9648 if (MEM_P (operands[2]))
9654 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9657 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9658 derived assemblers, confusingly reverse the direction of
9659 the operation for fsub{r} and fdiv{r} when the
9660 destination register is not st(0). The Intel assembler
9661 doesn't have this brain damage. Read !SYSV386_COMPAT to
9662 figure out what the hardware really does. */
9663 if (STACK_TOP_P (operands[0]))
9664 p = "{p\t%0, %2|rp\t%2, %0}";
9666 p = "{rp\t%2, %0|p\t%0, %2}";
9668 if (STACK_TOP_P (operands[0]))
9669 /* As above for fmul/fadd, we can't store to st(0). */
9670 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9672 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9677 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9680 if (STACK_TOP_P (operands[0]))
9681 p = "{rp\t%0, %1|p\t%1, %0}";
9683 p = "{p\t%1, %0|rp\t%0, %1}";
9685 if (STACK_TOP_P (operands[0]))
9686 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9688 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9693 if (STACK_TOP_P (operands[0]))
9695 if (STACK_TOP_P (operands[1]))
9696 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9698 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9701 else if (STACK_TOP_P (operands[1]))
9704 p = "{\t%1, %0|r\t%0, %1}";
9706 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9712 p = "{r\t%2, %0|\t%0, %2}";
9714 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9727 /* Return needed mode for entity in optimize_mode_switching pass. */
9730 ix86_mode_needed (int entity, rtx insn)
9732 enum attr_i387_cw mode;
9734 /* The mode UNINITIALIZED is used to store control word after a
9735 function call or ASM pattern. The mode ANY specify that function
9736 has no requirements on the control word and make no changes in the
9737 bits we are interested in. */
9740 || (NONJUMP_INSN_P (insn)
9741 && (asm_noperands (PATTERN (insn)) >= 0
9742 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9743 return I387_CW_UNINITIALIZED;
9745 if (recog_memoized (insn) < 0)
9748 mode = get_attr_i387_cw (insn);
9753 if (mode == I387_CW_TRUNC)
9758 if (mode == I387_CW_FLOOR)
9763 if (mode == I387_CW_CEIL)
9768 if (mode == I387_CW_MASK_PM)
9779 /* Output code to initialize control word copies used by trunc?f?i and
9780 rounding patterns. CURRENT_MODE is set to current control word,
9781 while NEW_MODE is set to new control word. */
9784 emit_i387_cw_initialization (int mode)
9786 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9789 enum ix86_stack_slot slot;
9791 rtx reg = gen_reg_rtx (HImode);
9793 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9794 emit_move_insn (reg, copy_rtx (stored_mode));
9796 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9801 /* round toward zero (truncate) */
9802 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9803 slot = SLOT_CW_TRUNC;
9807 /* round down toward -oo */
9808 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9809 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9810 slot = SLOT_CW_FLOOR;
9814 /* round up toward +oo */
9815 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9816 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9817 slot = SLOT_CW_CEIL;
9820 case I387_CW_MASK_PM:
9821 /* mask precision exception for nearbyint() */
9822 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9823 slot = SLOT_CW_MASK_PM;
9835 /* round toward zero (truncate) */
9836 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9837 slot = SLOT_CW_TRUNC;
9841 /* round down toward -oo */
9842 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9843 slot = SLOT_CW_FLOOR;
9847 /* round up toward +oo */
9848 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9849 slot = SLOT_CW_CEIL;
9852 case I387_CW_MASK_PM:
9853 /* mask precision exception for nearbyint() */
9854 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9855 slot = SLOT_CW_MASK_PM;
9863 gcc_assert (slot < MAX_386_STACK_LOCALS);
9865 new_mode = assign_386_stack_local (HImode, slot);
9866 emit_move_insn (new_mode, reg);
9869 /* Output code for INSN to convert a float to a signed int. OPERANDS
9870 are the insn operands. The output may be [HSD]Imode and the input
9871 operand may be [SDX]Fmode. */
9874 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9876 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9877 int dimode_p = GET_MODE (operands[0]) == DImode;
9878 int round_mode = get_attr_i387_cw (insn);
9880 /* Jump through a hoop or two for DImode, since the hardware has no
9881 non-popping instruction. We used to do this a different way, but
9882 that was somewhat fragile and broke with post-reload splitters. */
9883 if ((dimode_p || fisttp) && !stack_top_dies)
9884 output_asm_insn ("fld\t%y1", operands);
9886 gcc_assert (STACK_TOP_P (operands[1]));
9887 gcc_assert (MEM_P (operands[0]));
9888 gcc_assert (GET_MODE (operands[1]) != TFmode);
9891 output_asm_insn ("fisttp%z0\t%0", operands);
9894 if (round_mode != I387_CW_ANY)
9895 output_asm_insn ("fldcw\t%3", operands);
9896 if (stack_top_dies || dimode_p)
9897 output_asm_insn ("fistp%z0\t%0", operands);
9899 output_asm_insn ("fist%z0\t%0", operands);
9900 if (round_mode != I387_CW_ANY)
9901 output_asm_insn ("fldcw\t%2", operands);
9907 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9908 have the values zero or one, indicates the ffreep insn's operand
9909 from the OPERANDS array. */
9912 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9914 if (TARGET_USE_FFREEP)
9915 #if HAVE_AS_IX86_FFREEP
9916 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9919 static char retval[] = ".word\t0xc_df";
9920 int regno = REGNO (operands[opno]);
9922 gcc_assert (FP_REGNO_P (regno));
9924 retval[9] = '0' + (regno - FIRST_STACK_REG);
9929 return opno ? "fstp\t%y1" : "fstp\t%y0";
9933 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9934 should be used. UNORDERED_P is true when fucom should be used. */
9937 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9940 rtx cmp_op0, cmp_op1;
9941 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9945 cmp_op0 = operands[0];
9946 cmp_op1 = operands[1];
9950 cmp_op0 = operands[1];
9951 cmp_op1 = operands[2];
9956 if (GET_MODE (operands[0]) == SFmode)
9958 return "ucomiss\t{%1, %0|%0, %1}";
9960 return "comiss\t{%1, %0|%0, %1}";
9963 return "ucomisd\t{%1, %0|%0, %1}";
9965 return "comisd\t{%1, %0|%0, %1}";
9968 gcc_assert (STACK_TOP_P (cmp_op0));
9970 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9972 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9976 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9977 return output_387_ffreep (operands, 1);
9980 return "ftst\n\tfnstsw\t%0";
9983 if (STACK_REG_P (cmp_op1)
9985 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9986 && REGNO (cmp_op1) != FIRST_STACK_REG)
9988 /* If both the top of the 387 stack dies, and the other operand
9989 is also a stack register that dies, then this must be a
9990 `fcompp' float compare */
9994 /* There is no double popping fcomi variant. Fortunately,
9995 eflags is immune from the fstp's cc clobbering. */
9997 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9999 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10000 return output_387_ffreep (operands, 0);
10005 return "fucompp\n\tfnstsw\t%0";
10007 return "fcompp\n\tfnstsw\t%0";
10012 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10014 static const char * const alt[16] =
10016 "fcom%z2\t%y2\n\tfnstsw\t%0",
10017 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10018 "fucom%z2\t%y2\n\tfnstsw\t%0",
10019 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10021 "ficom%z2\t%y2\n\tfnstsw\t%0",
10022 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10026 "fcomi\t{%y1, %0|%0, %y1}",
10027 "fcomip\t{%y1, %0|%0, %y1}",
10028 "fucomi\t{%y1, %0|%0, %y1}",
10029 "fucomip\t{%y1, %0|%0, %y1}",
10040 mask = eflags_p << 3;
10041 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10042 mask |= unordered_p << 1;
10043 mask |= stack_top_dies;
10045 gcc_assert (mask < 16);
10054 ix86_output_addr_vec_elt (FILE *file, int value)
10056 const char *directive = ASM_LONG;
10060 directive = ASM_QUAD;
10062 gcc_assert (!TARGET_64BIT);
10065 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10069 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10071 const char *directive = ASM_LONG;
10074 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10075 directive = ASM_QUAD;
10077 gcc_assert (!TARGET_64BIT);
10079 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10080 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10081 fprintf (file, "%s%s%d-%s%d\n",
10082 directive, LPREFIX, value, LPREFIX, rel);
10083 else if (HAVE_AS_GOTOFF_IN_DATA)
10084 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10086 else if (TARGET_MACHO)
10088 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10089 machopic_output_function_base_name (file);
10090 fprintf(file, "\n");
10094 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10095 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10098 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10102 ix86_expand_clear (rtx dest)
10106 /* We play register width games, which are only valid after reload. */
10107 gcc_assert (reload_completed);
10109 /* Avoid HImode and its attendant prefix byte. */
10110 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10111 dest = gen_rtx_REG (SImode, REGNO (dest));
10112 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10114 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10115 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10117 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10118 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10124 /* X is an unchanging MEM. If it is a constant pool reference, return
10125 the constant pool rtx, else NULL. */
10128 maybe_get_pool_constant (rtx x)
10130 x = ix86_delegitimize_address (XEXP (x, 0));
10132 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10133 return get_pool_constant (x);
10139 ix86_expand_move (enum machine_mode mode, rtx operands[])
10142 enum tls_model model;
10147 if (GET_CODE (op1) == SYMBOL_REF)
10149 model = SYMBOL_REF_TLS_MODEL (op1);
10152 op1 = legitimize_tls_address (op1, model, true);
10153 op1 = force_operand (op1, op0);
10157 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10158 && SYMBOL_REF_DLLIMPORT_P (op1))
10159 op1 = legitimize_dllimport_symbol (op1, false);
10161 else if (GET_CODE (op1) == CONST
10162 && GET_CODE (XEXP (op1, 0)) == PLUS
10163 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10165 rtx addend = XEXP (XEXP (op1, 0), 1);
10166 rtx symbol = XEXP (XEXP (op1, 0), 0);
10169 model = SYMBOL_REF_TLS_MODEL (symbol);
10171 tmp = legitimize_tls_address (symbol, model, true);
10172 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10173 && SYMBOL_REF_DLLIMPORT_P (symbol))
10174 tmp = legitimize_dllimport_symbol (symbol, true);
10178 tmp = force_operand (tmp, NULL);
10179 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10180 op0, 1, OPTAB_DIRECT);
10186 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10188 if (TARGET_MACHO && !TARGET_64BIT)
10193 rtx temp = ((reload_in_progress
10194 || ((op0 && REG_P (op0))
10196 ? op0 : gen_reg_rtx (Pmode));
10197 op1 = machopic_indirect_data_reference (op1, temp);
10198 op1 = machopic_legitimize_pic_address (op1, mode,
10199 temp == op1 ? 0 : temp);
10201 else if (MACHOPIC_INDIRECT)
10202 op1 = machopic_indirect_data_reference (op1, 0);
10210 op1 = force_reg (Pmode, op1);
10211 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10213 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10214 op1 = legitimize_pic_address (op1, reg);
10223 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10224 || !push_operand (op0, mode))
10226 op1 = force_reg (mode, op1);
10228 if (push_operand (op0, mode)
10229 && ! general_no_elim_operand (op1, mode))
10230 op1 = copy_to_mode_reg (mode, op1);
10232 /* Force large constants in 64bit compilation into register
10233 to get them CSEed. */
10234 if (can_create_pseudo_p ()
10235 && (mode == DImode) && TARGET_64BIT
10236 && immediate_operand (op1, mode)
10237 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10238 && !register_operand (op0, mode)
10240 op1 = copy_to_mode_reg (mode, op1);
10242 if (can_create_pseudo_p ()
10243 && FLOAT_MODE_P (mode)
10244 && GET_CODE (op1) == CONST_DOUBLE)
10246 /* If we are loading a floating point constant to a register,
10247 force the value to memory now, since we'll get better code
10248 out the back end. */
10250 op1 = validize_mem (force_const_mem (mode, op1));
10251 if (!register_operand (op0, mode))
10253 rtx temp = gen_reg_rtx (mode);
10254 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10255 emit_move_insn (op0, temp);
10261 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10265 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10267 rtx op0 = operands[0], op1 = operands[1];
10268 unsigned int align = GET_MODE_ALIGNMENT (mode);
10270 /* Force constants other than zero into memory. We do not know how
10271 the instructions used to build constants modify the upper 64 bits
10272 of the register, once we have that information we may be able
10273 to handle some of them more efficiently. */
10274 if (can_create_pseudo_p ()
10275 && register_operand (op0, mode)
10276 && (CONSTANT_P (op1)
10277 || (GET_CODE (op1) == SUBREG
10278 && CONSTANT_P (SUBREG_REG (op1))))
10279 && standard_sse_constant_p (op1) <= 0)
10280 op1 = validize_mem (force_const_mem (mode, op1));
10282 /* TDmode values are passed as TImode on the stack. TImode values
10283 are moved via xmm registers, and moving them to stack can result in
10284 unaligned memory access. Use ix86_expand_vector_move_misalign()
10285 if memory operand is not aligned correctly. */
10286 if (can_create_pseudo_p ()
10287 && (mode == TImode) && !TARGET_64BIT
10288 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10289 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10293 /* ix86_expand_vector_move_misalign() does not like constants ... */
10294 if (CONSTANT_P (op1)
10295 || (GET_CODE (op1) == SUBREG
10296 && CONSTANT_P (SUBREG_REG (op1))))
10297 op1 = validize_mem (force_const_mem (mode, op1));
10299 /* ... nor both arguments in memory. */
10300 if (!register_operand (op0, mode)
10301 && !register_operand (op1, mode))
10302 op1 = force_reg (mode, op1);
10304 tmp[0] = op0; tmp[1] = op1;
10305 ix86_expand_vector_move_misalign (mode, tmp);
10309 /* Make operand1 a register if it isn't already. */
10310 if (can_create_pseudo_p ()
10311 && !register_operand (op0, mode)
10312 && !register_operand (op1, mode))
10314 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10318 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10321 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10322 straight to ix86_expand_vector_move. */
10323 /* Code generation for scalar reg-reg moves of single and double precision data:
10324 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10328 if (x86_sse_partial_reg_dependency == true)
10333 Code generation for scalar loads of double precision data:
10334 if (x86_sse_split_regs == true)
10335 movlpd mem, reg (gas syntax)
10339 Code generation for unaligned packed loads of single precision data
10340 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10341 if (x86_sse_unaligned_move_optimal)
10344 if (x86_sse_partial_reg_dependency == true)
10356 Code generation for unaligned packed loads of double precision data
10357 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10358 if (x86_sse_unaligned_move_optimal)
10361 if (x86_sse_split_regs == true)
10374 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10383 /* If we're optimizing for size, movups is the smallest. */
10386 op0 = gen_lowpart (V4SFmode, op0);
10387 op1 = gen_lowpart (V4SFmode, op1);
10388 emit_insn (gen_sse_movups (op0, op1));
10392 /* ??? If we have typed data, then it would appear that using
10393 movdqu is the only way to get unaligned data loaded with
10395 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10397 op0 = gen_lowpart (V16QImode, op0);
10398 op1 = gen_lowpart (V16QImode, op1);
10399 emit_insn (gen_sse2_movdqu (op0, op1));
10403 if (TARGET_SSE2 && mode == V2DFmode)
10407 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10409 op0 = gen_lowpart (V2DFmode, op0);
10410 op1 = gen_lowpart (V2DFmode, op1);
10411 emit_insn (gen_sse2_movupd (op0, op1));
10415 /* When SSE registers are split into halves, we can avoid
10416 writing to the top half twice. */
10417 if (TARGET_SSE_SPLIT_REGS)
10419 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10424 /* ??? Not sure about the best option for the Intel chips.
10425 The following would seem to satisfy; the register is
10426 entirely cleared, breaking the dependency chain. We
10427 then store to the upper half, with a dependency depth
10428 of one. A rumor has it that Intel recommends two movsd
10429 followed by an unpacklpd, but this is unconfirmed. And
10430 given that the dependency depth of the unpacklpd would
10431 still be one, I'm not sure why this would be better. */
10432 zero = CONST0_RTX (V2DFmode);
10435 m = adjust_address (op1, DFmode, 0);
10436 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10437 m = adjust_address (op1, DFmode, 8);
10438 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10442 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10444 op0 = gen_lowpart (V4SFmode, op0);
10445 op1 = gen_lowpart (V4SFmode, op1);
10446 emit_insn (gen_sse_movups (op0, op1));
10450 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10451 emit_move_insn (op0, CONST0_RTX (mode));
10453 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10455 if (mode != V4SFmode)
10456 op0 = gen_lowpart (V4SFmode, op0);
10457 m = adjust_address (op1, V2SFmode, 0);
10458 emit_insn (gen_sse_loadlps (op0, op0, m));
10459 m = adjust_address (op1, V2SFmode, 8);
10460 emit_insn (gen_sse_loadhps (op0, op0, m));
10463 else if (MEM_P (op0))
10465 /* If we're optimizing for size, movups is the smallest. */
10468 op0 = gen_lowpart (V4SFmode, op0);
10469 op1 = gen_lowpart (V4SFmode, op1);
10470 emit_insn (gen_sse_movups (op0, op1));
10474 /* ??? Similar to above, only less clear because of quote
10475 typeless stores unquote. */
10476 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10477 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10479 op0 = gen_lowpart (V16QImode, op0);
10480 op1 = gen_lowpart (V16QImode, op1);
10481 emit_insn (gen_sse2_movdqu (op0, op1));
10485 if (TARGET_SSE2 && mode == V2DFmode)
10487 m = adjust_address (op0, DFmode, 0);
10488 emit_insn (gen_sse2_storelpd (m, op1));
10489 m = adjust_address (op0, DFmode, 8);
10490 emit_insn (gen_sse2_storehpd (m, op1));
10494 if (mode != V4SFmode)
10495 op1 = gen_lowpart (V4SFmode, op1);
10496 m = adjust_address (op0, V2SFmode, 0);
10497 emit_insn (gen_sse_storelps (m, op1));
10498 m = adjust_address (op0, V2SFmode, 8);
10499 emit_insn (gen_sse_storehps (m, op1));
10503 gcc_unreachable ();
10506 /* Expand a push in MODE. This is some mode for which we do not support
10507 proper push instructions, at least from the registers that we expect
10508 the value to live in. */
10511 ix86_expand_push (enum machine_mode mode, rtx x)
10515 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10516 GEN_INT (-GET_MODE_SIZE (mode)),
10517 stack_pointer_rtx, 1, OPTAB_DIRECT);
10518 if (tmp != stack_pointer_rtx)
10519 emit_move_insn (stack_pointer_rtx, tmp);
10521 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10522 emit_move_insn (tmp, x);
10525 /* Helper function of ix86_fixup_binary_operands to canonicalize
10526 operand order. Returns true if the operands should be swapped. */
10529 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10532 rtx dst = operands[0];
10533 rtx src1 = operands[1];
10534 rtx src2 = operands[2];
10536 /* If the operation is not commutative, we can't do anything. */
10537 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10540 /* Highest priority is that src1 should match dst. */
10541 if (rtx_equal_p (dst, src1))
10543 if (rtx_equal_p (dst, src2))
10546 /* Next highest priority is that immediate constants come second. */
10547 if (immediate_operand (src2, mode))
10549 if (immediate_operand (src1, mode))
10552 /* Lowest priority is that memory references should come second. */
10562 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10563 destination to use for the operation. If different from the true
10564 destination in operands[0], a copy operation will be required. */
10567 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10570 rtx dst = operands[0];
10571 rtx src1 = operands[1];
10572 rtx src2 = operands[2];
10574 /* Canonicalize operand order. */
10575 if (ix86_swap_binary_operands_p (code, mode, operands))
10582 /* Both source operands cannot be in memory. */
10583 if (MEM_P (src1) && MEM_P (src2))
10585 /* Optimization: Only read from memory once. */
10586 if (rtx_equal_p (src1, src2))
10588 src2 = force_reg (mode, src2);
10592 src2 = force_reg (mode, src2);
10595 /* If the destination is memory, and we do not have matching source
10596 operands, do things in registers. */
10597 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10598 dst = gen_reg_rtx (mode);
10600 /* Source 1 cannot be a constant. */
10601 if (CONSTANT_P (src1))
10602 src1 = force_reg (mode, src1);
10604 /* Source 1 cannot be a non-matching memory. */
10605 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10606 src1 = force_reg (mode, src1);
10608 operands[1] = src1;
10609 operands[2] = src2;
10613 /* Similarly, but assume that the destination has already been
10614 set up properly. */
10617 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10618 enum machine_mode mode, rtx operands[])
10620 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10621 gcc_assert (dst == operands[0]);
10624 /* Attempt to expand a binary operator. Make the expansion closer to the
10625 actual machine, then just general_operand, which will allow 3 separate
10626 memory references (one output, two input) in a single insn. */
10629 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10632 rtx src1, src2, dst, op, clob;
10634 dst = ix86_fixup_binary_operands (code, mode, operands);
10635 src1 = operands[1];
10636 src2 = operands[2];
10638 /* Emit the instruction. */
10640 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10641 if (reload_in_progress)
10643 /* Reload doesn't know about the flags register, and doesn't know that
10644 it doesn't want to clobber it. We can only do this with PLUS. */
10645 gcc_assert (code == PLUS);
10650 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10651 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10654 /* Fix up the destination if needed. */
10655 if (dst != operands[0])
10656 emit_move_insn (operands[0], dst);
10659 /* Return TRUE or FALSE depending on whether the binary operator meets the
10660 appropriate constraints. */
10663 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10666 rtx dst = operands[0];
10667 rtx src1 = operands[1];
10668 rtx src2 = operands[2];
10670 /* Both source operands cannot be in memory. */
10671 if (MEM_P (src1) && MEM_P (src2))
10674 /* Canonicalize operand order for commutative operators. */
10675 if (ix86_swap_binary_operands_p (code, mode, operands))
10682 /* If the destination is memory, we must have a matching source operand. */
10683 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10686 /* Source 1 cannot be a constant. */
10687 if (CONSTANT_P (src1))
10690 /* Source 1 cannot be a non-matching memory. */
10691 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10697 /* Attempt to expand a unary operator. Make the expansion closer to the
10698 actual machine, then just general_operand, which will allow 2 separate
10699 memory references (one output, one input) in a single insn. */
10702 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10705 int matching_memory;
10706 rtx src, dst, op, clob;
10711 /* If the destination is memory, and we do not have matching source
10712 operands, do things in registers. */
10713 matching_memory = 0;
10716 if (rtx_equal_p (dst, src))
10717 matching_memory = 1;
10719 dst = gen_reg_rtx (mode);
10722 /* When source operand is memory, destination must match. */
10723 if (MEM_P (src) && !matching_memory)
10724 src = force_reg (mode, src);
10726 /* Emit the instruction. */
10728 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10729 if (reload_in_progress || code == NOT)
10731 /* Reload doesn't know about the flags register, and doesn't know that
10732 it doesn't want to clobber it. */
10733 gcc_assert (code == NOT);
10738 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10739 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10742 /* Fix up the destination if needed. */
10743 if (dst != operands[0])
10744 emit_move_insn (operands[0], dst);
10747 /* Return TRUE or FALSE depending on whether the unary operator meets the
10748 appropriate constraints. */
10751 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10752 enum machine_mode mode ATTRIBUTE_UNUSED,
10753 rtx operands[2] ATTRIBUTE_UNUSED)
10755 /* If one of operands is memory, source and destination must match. */
10756 if ((MEM_P (operands[0])
10757 || MEM_P (operands[1]))
10758 && ! rtx_equal_p (operands[0], operands[1]))
10763 /* Post-reload splitter for converting an SF or DFmode value in an
10764 SSE register into an unsigned SImode. */
10767 ix86_split_convert_uns_si_sse (rtx operands[])
10769 enum machine_mode vecmode;
10770 rtx value, large, zero_or_two31, input, two31, x;
10772 large = operands[1];
10773 zero_or_two31 = operands[2];
10774 input = operands[3];
10775 two31 = operands[4];
10776 vecmode = GET_MODE (large);
10777 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10779 /* Load up the value into the low element. We must ensure that the other
10780 elements are valid floats -- zero is the easiest such value. */
10783 if (vecmode == V4SFmode)
10784 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10786 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10790 input = gen_rtx_REG (vecmode, REGNO (input));
10791 emit_move_insn (value, CONST0_RTX (vecmode));
10792 if (vecmode == V4SFmode)
10793 emit_insn (gen_sse_movss (value, value, input));
10795 emit_insn (gen_sse2_movsd (value, value, input));
10798 emit_move_insn (large, two31);
10799 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10801 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10802 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10804 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10805 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10807 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10808 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10810 large = gen_rtx_REG (V4SImode, REGNO (large));
10811 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10813 x = gen_rtx_REG (V4SImode, REGNO (value));
10814 if (vecmode == V4SFmode)
10815 emit_insn (gen_sse2_cvttps2dq (x, value));
10817 emit_insn (gen_sse2_cvttpd2dq (x, value));
10820 emit_insn (gen_xorv4si3 (value, value, large));
10823 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10824 Expects the 64-bit DImode to be supplied in a pair of integral
10825 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10826 -mfpmath=sse, !optimize_size only. */
10829 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10831 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10832 rtx int_xmm, fp_xmm;
10833 rtx biases, exponents;
10836 int_xmm = gen_reg_rtx (V4SImode);
10837 if (TARGET_INTER_UNIT_MOVES)
10838 emit_insn (gen_movdi_to_sse (int_xmm, input));
10839 else if (TARGET_SSE_SPLIT_REGS)
10841 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10842 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10846 x = gen_reg_rtx (V2DImode);
10847 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10848 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10851 x = gen_rtx_CONST_VECTOR (V4SImode,
10852 gen_rtvec (4, GEN_INT (0x43300000UL),
10853 GEN_INT (0x45300000UL),
10854 const0_rtx, const0_rtx));
10855 exponents = validize_mem (force_const_mem (V4SImode, x));
10857 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10858 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10860 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10861 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10862 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10863 (0x1.0p84 + double(fp_value_hi_xmm)).
10864 Note these exponents differ by 32. */
10866 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10868 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10869 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10870 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10871 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10872 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10873 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10874 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10875 biases = validize_mem (force_const_mem (V2DFmode, biases));
10876 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10878 /* Add the upper and lower DFmode values together. */
10880 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10883 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10884 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10885 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10888 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10891 /* Convert an unsigned SImode value into a DFmode. Only currently used
10892 for SSE, but applicable anywhere. */
10895 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10897 REAL_VALUE_TYPE TWO31r;
10900 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10901 NULL, 1, OPTAB_DIRECT);
10903 fp = gen_reg_rtx (DFmode);
10904 emit_insn (gen_floatsidf2 (fp, x));
10906 real_ldexp (&TWO31r, &dconst1, 31);
10907 x = const_double_from_real_value (TWO31r, DFmode);
10909 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10911 emit_move_insn (target, x);
10914 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10915 32-bit mode; otherwise we have a direct convert instruction. */
10918 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10920 REAL_VALUE_TYPE TWO32r;
10921 rtx fp_lo, fp_hi, x;
10923 fp_lo = gen_reg_rtx (DFmode);
10924 fp_hi = gen_reg_rtx (DFmode);
10926 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10928 real_ldexp (&TWO32r, &dconst1, 32);
10929 x = const_double_from_real_value (TWO32r, DFmode);
10930 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10932 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10934 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10937 emit_move_insn (target, x);
10940 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10941 For x86_32, -mfpmath=sse, !optimize_size only. */
10943 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10945 REAL_VALUE_TYPE ONE16r;
10946 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10948 real_ldexp (&ONE16r, &dconst1, 16);
10949 x = const_double_from_real_value (ONE16r, SFmode);
10950 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10951 NULL, 0, OPTAB_DIRECT);
10952 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10953 NULL, 0, OPTAB_DIRECT);
10954 fp_hi = gen_reg_rtx (SFmode);
10955 fp_lo = gen_reg_rtx (SFmode);
10956 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10957 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10958 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10960 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10962 if (!rtx_equal_p (target, fp_hi))
10963 emit_move_insn (target, fp_hi);
10966 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10967 then replicate the value for all elements of the vector
10971 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10978 v = gen_rtvec (4, value, value, value, value);
10979 return gen_rtx_CONST_VECTOR (V4SImode, v);
10983 v = gen_rtvec (2, value, value);
10984 return gen_rtx_CONST_VECTOR (V2DImode, v);
10988 v = gen_rtvec (4, value, value, value, value);
10990 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10991 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10992 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10996 v = gen_rtvec (2, value, value);
10998 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10999 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11002 gcc_unreachable ();
11006 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11007 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11008 for an SSE register. If VECT is true, then replicate the mask for
11009 all elements of the vector register. If INVERT is true, then create
11010 a mask excluding the sign bit. */
11013 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11015 enum machine_mode vec_mode, imode;
11016 HOST_WIDE_INT hi, lo;
11021 /* Find the sign bit, sign extended to 2*HWI. */
11027 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11028 lo = 0x80000000, hi = lo < 0;
11034 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11035 if (HOST_BITS_PER_WIDE_INT >= 64)
11036 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11038 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11044 vec_mode = VOIDmode;
11045 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11046 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11050 gcc_unreachable ();
11054 lo = ~lo, hi = ~hi;
11056 /* Force this value into the low part of a fp vector constant. */
11057 mask = immed_double_const (lo, hi, imode);
11058 mask = gen_lowpart (mode, mask);
11060 if (vec_mode == VOIDmode)
11061 return force_reg (mode, mask);
11063 v = ix86_build_const_vector (mode, vect, mask);
11064 return force_reg (vec_mode, v);
11067 /* Generate code for floating point ABS or NEG. */
11070 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11073 rtx mask, set, use, clob, dst, src;
11074 bool use_sse = false;
11075 bool vector_mode = VECTOR_MODE_P (mode);
11076 enum machine_mode elt_mode = mode;
11080 elt_mode = GET_MODE_INNER (mode);
11083 else if (mode == TFmode)
11085 else if (TARGET_SSE_MATH)
11086 use_sse = SSE_FLOAT_MODE_P (mode);
11088 /* NEG and ABS performed with SSE use bitwise mask operations.
11089 Create the appropriate mask now. */
11091 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11100 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11101 set = gen_rtx_SET (VOIDmode, dst, set);
11106 set = gen_rtx_fmt_e (code, mode, src);
11107 set = gen_rtx_SET (VOIDmode, dst, set);
11110 use = gen_rtx_USE (VOIDmode, mask);
11111 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11112 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11113 gen_rtvec (3, set, use, clob)));
11120 /* Expand a copysign operation. Special case operand 0 being a constant. */
11123 ix86_expand_copysign (rtx operands[])
11125 enum machine_mode mode, vmode;
11126 rtx dest, op0, op1, mask, nmask;
11128 dest = operands[0];
11132 mode = GET_MODE (dest);
11133 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11135 if (GET_CODE (op0) == CONST_DOUBLE)
11137 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11139 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11140 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11142 if (mode == SFmode || mode == DFmode)
11144 if (op0 == CONST0_RTX (mode))
11145 op0 = CONST0_RTX (vmode);
11150 if (mode == SFmode)
11151 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11152 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11154 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11155 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11159 mask = ix86_build_signbit_mask (mode, 0, 0);
11161 if (mode == SFmode)
11162 copysign_insn = gen_copysignsf3_const;
11163 else if (mode == DFmode)
11164 copysign_insn = gen_copysigndf3_const;
11166 copysign_insn = gen_copysigntf3_const;
11168 emit_insn (copysign_insn (dest, op0, op1, mask));
11172 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11174 nmask = ix86_build_signbit_mask (mode, 0, 1);
11175 mask = ix86_build_signbit_mask (mode, 0, 0);
11177 if (mode == SFmode)
11178 copysign_insn = gen_copysignsf3_var;
11179 else if (mode == DFmode)
11180 copysign_insn = gen_copysigndf3_var;
11182 copysign_insn = gen_copysigntf3_var;
11184 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11188 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11189 be a constant, and so has already been expanded into a vector constant. */
11192 ix86_split_copysign_const (rtx operands[])
11194 enum machine_mode mode, vmode;
11195 rtx dest, op0, op1, mask, x;
11197 dest = operands[0];
11200 mask = operands[3];
11202 mode = GET_MODE (dest);
11203 vmode = GET_MODE (mask);
11205 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11206 x = gen_rtx_AND (vmode, dest, mask);
11207 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11209 if (op0 != CONST0_RTX (vmode))
11211 x = gen_rtx_IOR (vmode, dest, op0);
11212 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11216 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11217 so we have to do two masks. */
11220 ix86_split_copysign_var (rtx operands[])
11222 enum machine_mode mode, vmode;
11223 rtx dest, scratch, op0, op1, mask, nmask, x;
11225 dest = operands[0];
11226 scratch = operands[1];
11229 nmask = operands[4];
11230 mask = operands[5];
11232 mode = GET_MODE (dest);
11233 vmode = GET_MODE (mask);
11235 if (rtx_equal_p (op0, op1))
11237 /* Shouldn't happen often (it's useless, obviously), but when it does
11238 we'd generate incorrect code if we continue below. */
11239 emit_move_insn (dest, op0);
11243 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11245 gcc_assert (REGNO (op1) == REGNO (scratch));
11247 x = gen_rtx_AND (vmode, scratch, mask);
11248 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11251 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11252 x = gen_rtx_NOT (vmode, dest);
11253 x = gen_rtx_AND (vmode, x, op0);
11254 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11258 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11260 x = gen_rtx_AND (vmode, scratch, mask);
11262 else /* alternative 2,4 */
11264 gcc_assert (REGNO (mask) == REGNO (scratch));
11265 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11266 x = gen_rtx_AND (vmode, scratch, op1);
11268 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11270 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11272 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11273 x = gen_rtx_AND (vmode, dest, nmask);
11275 else /* alternative 3,4 */
11277 gcc_assert (REGNO (nmask) == REGNO (dest));
11279 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11280 x = gen_rtx_AND (vmode, dest, op0);
11282 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11285 x = gen_rtx_IOR (vmode, dest, scratch);
11286 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11289 /* Return TRUE or FALSE depending on whether the first SET in INSN
11290 has source and destination with matching CC modes, and that the
11291 CC mode is at least as constrained as REQ_MODE. */
11294 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11297 enum machine_mode set_mode;
11299 set = PATTERN (insn);
11300 if (GET_CODE (set) == PARALLEL)
11301 set = XVECEXP (set, 0, 0);
11302 gcc_assert (GET_CODE (set) == SET);
11303 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11305 set_mode = GET_MODE (SET_DEST (set));
11309 if (req_mode != CCNOmode
11310 && (req_mode != CCmode
11311 || XEXP (SET_SRC (set), 1) != const0_rtx))
11315 if (req_mode == CCGCmode)
11319 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11323 if (req_mode == CCZmode)
11330 gcc_unreachable ();
11333 return (GET_MODE (SET_SRC (set)) == set_mode);
11336 /* Generate insn patterns to do an integer compare of OPERANDS. */
11339 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11341 enum machine_mode cmpmode;
11344 cmpmode = SELECT_CC_MODE (code, op0, op1);
11345 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11347 /* This is very simple, but making the interface the same as in the
11348 FP case makes the rest of the code easier. */
11349 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11350 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11352 /* Return the test that should be put into the flags user, i.e.
11353 the bcc, scc, or cmov instruction. */
11354 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11357 /* Figure out whether to use ordered or unordered fp comparisons.
11358 Return the appropriate mode to use. */
11361 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11363 /* ??? In order to make all comparisons reversible, we do all comparisons
11364 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11365 all forms trapping and nontrapping comparisons, we can make inequality
11366 comparisons trapping again, since it results in better code when using
11367 FCOM based compares. */
11368 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11372 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11374 enum machine_mode mode = GET_MODE (op0);
11376 if (SCALAR_FLOAT_MODE_P (mode))
11378 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11379 return ix86_fp_compare_mode (code);
11384 /* Only zero flag is needed. */
11385 case EQ: /* ZF=0 */
11386 case NE: /* ZF!=0 */
11388 /* Codes needing carry flag. */
11389 case GEU: /* CF=0 */
11390 case LTU: /* CF=1 */
11391 /* Detect overflow checks. They need just the carry flag. */
11392 if (GET_CODE (op0) == PLUS
11393 && rtx_equal_p (op1, XEXP (op0, 0)))
11397 case GTU: /* CF=0 & ZF=0 */
11398 case LEU: /* CF=1 | ZF=1 */
11399 /* Detect overflow checks. They need just the carry flag. */
11400 if (GET_CODE (op0) == MINUS
11401 && rtx_equal_p (op1, XEXP (op0, 0)))
11405 /* Codes possibly doable only with sign flag when
11406 comparing against zero. */
11407 case GE: /* SF=OF or SF=0 */
11408 case LT: /* SF<>OF or SF=1 */
11409 if (op1 == const0_rtx)
11412 /* For other cases Carry flag is not required. */
11414 /* Codes doable only with sign flag when comparing
11415 against zero, but we miss jump instruction for it
11416 so we need to use relational tests against overflow
11417 that thus needs to be zero. */
11418 case GT: /* ZF=0 & SF=OF */
11419 case LE: /* ZF=1 | SF<>OF */
11420 if (op1 == const0_rtx)
11424 /* strcmp pattern do (use flags) and combine may ask us for proper
11429 gcc_unreachable ();
11433 /* Return the fixed registers used for condition codes. */
11436 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11443 /* If two condition code modes are compatible, return a condition code
11444 mode which is compatible with both. Otherwise, return
11447 static enum machine_mode
11448 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11453 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11456 if ((m1 == CCGCmode && m2 == CCGOCmode)
11457 || (m1 == CCGOCmode && m2 == CCGCmode))
11463 gcc_unreachable ();
11493 /* These are only compatible with themselves, which we already
11499 /* Split comparison code CODE into comparisons we can do using branch
11500 instructions. BYPASS_CODE is comparison code for branch that will
11501 branch around FIRST_CODE and SECOND_CODE. If some of branches
11502 is not required, set value to UNKNOWN.
11503 We never require more than two branches. */
11506 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11507 enum rtx_code *first_code,
11508 enum rtx_code *second_code)
11510 *first_code = code;
11511 *bypass_code = UNKNOWN;
11512 *second_code = UNKNOWN;
11514 /* The fcomi comparison sets flags as follows:
11524 case GT: /* GTU - CF=0 & ZF=0 */
11525 case GE: /* GEU - CF=0 */
11526 case ORDERED: /* PF=0 */
11527 case UNORDERED: /* PF=1 */
11528 case UNEQ: /* EQ - ZF=1 */
11529 case UNLT: /* LTU - CF=1 */
11530 case UNLE: /* LEU - CF=1 | ZF=1 */
11531 case LTGT: /* EQ - ZF=0 */
11533 case LT: /* LTU - CF=1 - fails on unordered */
11534 *first_code = UNLT;
11535 *bypass_code = UNORDERED;
11537 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11538 *first_code = UNLE;
11539 *bypass_code = UNORDERED;
11541 case EQ: /* EQ - ZF=1 - fails on unordered */
11542 *first_code = UNEQ;
11543 *bypass_code = UNORDERED;
11545 case NE: /* NE - ZF=0 - fails on unordered */
11546 *first_code = LTGT;
11547 *second_code = UNORDERED;
11549 case UNGE: /* GEU - CF=0 - fails on unordered */
11551 *second_code = UNORDERED;
11553 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11555 *second_code = UNORDERED;
11558 gcc_unreachable ();
11560 if (!TARGET_IEEE_FP)
11562 *second_code = UNKNOWN;
11563 *bypass_code = UNKNOWN;
11567 /* Return cost of comparison done fcom + arithmetics operations on AX.
11568 All following functions do use number of instructions as a cost metrics.
11569 In future this should be tweaked to compute bytes for optimize_size and
11570 take into account performance of various instructions on various CPUs. */
11572 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11574 if (!TARGET_IEEE_FP)
11576 /* The cost of code output by ix86_expand_fp_compare. */
11600 gcc_unreachable ();
11604 /* Return cost of comparison done using fcomi operation.
11605 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11607 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11609 enum rtx_code bypass_code, first_code, second_code;
11610 /* Return arbitrarily high cost when instruction is not supported - this
11611 prevents gcc from using it. */
11614 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11615 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11618 /* Return cost of comparison done using sahf operation.
11619 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11621 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11623 enum rtx_code bypass_code, first_code, second_code;
11624 /* Return arbitrarily high cost when instruction is not preferred - this
11625 avoids gcc from using it. */
11626 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11628 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11629 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11632 /* Compute cost of the comparison done using any method.
11633 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11635 ix86_fp_comparison_cost (enum rtx_code code)
11637 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11640 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11641 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11643 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11644 if (min > sahf_cost)
11646 if (min > fcomi_cost)
11651 /* Return true if we should use an FCOMI instruction for this
11655 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11657 enum rtx_code swapped_code = swap_condition (code);
11659 return ((ix86_fp_comparison_cost (code)
11660 == ix86_fp_comparison_fcomi_cost (code))
11661 || (ix86_fp_comparison_cost (swapped_code)
11662 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11665 /* Swap, force into registers, or otherwise massage the two operands
11666 to a fp comparison. The operands are updated in place; the new
11667 comparison code is returned. */
11669 static enum rtx_code
11670 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11672 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11673 rtx op0 = *pop0, op1 = *pop1;
11674 enum machine_mode op_mode = GET_MODE (op0);
11675 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11677 /* All of the unordered compare instructions only work on registers.
11678 The same is true of the fcomi compare instructions. The XFmode
11679 compare instructions require registers except when comparing
11680 against zero or when converting operand 1 from fixed point to
11684 && (fpcmp_mode == CCFPUmode
11685 || (op_mode == XFmode
11686 && ! (standard_80387_constant_p (op0) == 1
11687 || standard_80387_constant_p (op1) == 1)
11688 && GET_CODE (op1) != FLOAT)
11689 || ix86_use_fcomi_compare (code)))
11691 op0 = force_reg (op_mode, op0);
11692 op1 = force_reg (op_mode, op1);
11696 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11697 things around if they appear profitable, otherwise force op0
11698 into a register. */
11700 if (standard_80387_constant_p (op0) == 0
11702 && ! (standard_80387_constant_p (op1) == 0
11706 tmp = op0, op0 = op1, op1 = tmp;
11707 code = swap_condition (code);
11711 op0 = force_reg (op_mode, op0);
11713 if (CONSTANT_P (op1))
11715 int tmp = standard_80387_constant_p (op1);
11717 op1 = validize_mem (force_const_mem (op_mode, op1));
11721 op1 = force_reg (op_mode, op1);
11724 op1 = force_reg (op_mode, op1);
11728 /* Try to rearrange the comparison to make it cheaper. */
11729 if (ix86_fp_comparison_cost (code)
11730 > ix86_fp_comparison_cost (swap_condition (code))
11731 && (REG_P (op1) || can_create_pseudo_p ()))
11734 tmp = op0, op0 = op1, op1 = tmp;
11735 code = swap_condition (code);
11737 op0 = force_reg (op_mode, op0);
11745 /* Convert comparison codes we use to represent FP comparison to integer
11746 code that will result in proper branch. Return UNKNOWN if no such code
11750 ix86_fp_compare_code_to_integer (enum rtx_code code)
11779 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11782 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11783 rtx *second_test, rtx *bypass_test)
11785 enum machine_mode fpcmp_mode, intcmp_mode;
11787 int cost = ix86_fp_comparison_cost (code);
11788 enum rtx_code bypass_code, first_code, second_code;
11790 fpcmp_mode = ix86_fp_compare_mode (code);
11791 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11794 *second_test = NULL_RTX;
11796 *bypass_test = NULL_RTX;
11798 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11800 /* Do fcomi/sahf based test when profitable. */
11801 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11802 && (bypass_code == UNKNOWN || bypass_test)
11803 && (second_code == UNKNOWN || second_test))
11805 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11806 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11812 gcc_assert (TARGET_SAHF);
11815 scratch = gen_reg_rtx (HImode);
11816 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11818 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11821 /* The FP codes work out to act like unsigned. */
11822 intcmp_mode = fpcmp_mode;
11824 if (bypass_code != UNKNOWN)
11825 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11826 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11828 if (second_code != UNKNOWN)
11829 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11830 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11835 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11836 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11837 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11839 scratch = gen_reg_rtx (HImode);
11840 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11842 /* In the unordered case, we have to check C2 for NaN's, which
11843 doesn't happen to work out to anything nice combination-wise.
11844 So do some bit twiddling on the value we've got in AH to come
11845 up with an appropriate set of condition codes. */
11847 intcmp_mode = CCNOmode;
11852 if (code == GT || !TARGET_IEEE_FP)
11854 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11859 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11860 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11861 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11862 intcmp_mode = CCmode;
11868 if (code == LT && TARGET_IEEE_FP)
11870 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11871 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11872 intcmp_mode = CCmode;
11877 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11883 if (code == GE || !TARGET_IEEE_FP)
11885 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11890 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11891 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11898 if (code == LE && TARGET_IEEE_FP)
11900 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11901 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11902 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11903 intcmp_mode = CCmode;
11908 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11914 if (code == EQ && TARGET_IEEE_FP)
11916 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11917 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11918 intcmp_mode = CCmode;
11923 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11930 if (code == NE && TARGET_IEEE_FP)
11932 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11933 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11939 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11945 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11949 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11954 gcc_unreachable ();
11958 /* Return the test that should be put into the flags user, i.e.
11959 the bcc, scc, or cmov instruction. */
11960 return gen_rtx_fmt_ee (code, VOIDmode,
11961 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11966 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11969 op0 = ix86_compare_op0;
11970 op1 = ix86_compare_op1;
11973 *second_test = NULL_RTX;
11975 *bypass_test = NULL_RTX;
11977 if (ix86_compare_emitted)
11979 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11980 ix86_compare_emitted = NULL_RTX;
11982 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11984 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11985 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11986 second_test, bypass_test);
11989 ret = ix86_expand_int_compare (code, op0, op1);
11994 /* Return true if the CODE will result in nontrivial jump sequence. */
11996 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11998 enum rtx_code bypass_code, first_code, second_code;
12001 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12002 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12006 ix86_expand_branch (enum rtx_code code, rtx label)
12010 /* If we have emitted a compare insn, go straight to simple.
12011 ix86_expand_compare won't emit anything if ix86_compare_emitted
12013 if (ix86_compare_emitted)
12016 switch (GET_MODE (ix86_compare_op0))
12022 tmp = ix86_expand_compare (code, NULL, NULL);
12023 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12024 gen_rtx_LABEL_REF (VOIDmode, label),
12026 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12035 enum rtx_code bypass_code, first_code, second_code;
12037 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12038 &ix86_compare_op1);
12040 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12042 /* Check whether we will use the natural sequence with one jump. If
12043 so, we can expand jump early. Otherwise delay expansion by
12044 creating compound insn to not confuse optimizers. */
12045 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12047 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12048 gen_rtx_LABEL_REF (VOIDmode, label),
12049 pc_rtx, NULL_RTX, NULL_RTX);
12053 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12054 ix86_compare_op0, ix86_compare_op1);
12055 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12056 gen_rtx_LABEL_REF (VOIDmode, label),
12058 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12060 use_fcomi = ix86_use_fcomi_compare (code);
12061 vec = rtvec_alloc (3 + !use_fcomi);
12062 RTVEC_ELT (vec, 0) = tmp;
12064 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12066 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12069 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12071 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12080 /* Expand DImode branch into multiple compare+branch. */
12082 rtx lo[2], hi[2], label2;
12083 enum rtx_code code1, code2, code3;
12084 enum machine_mode submode;
12086 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12088 tmp = ix86_compare_op0;
12089 ix86_compare_op0 = ix86_compare_op1;
12090 ix86_compare_op1 = tmp;
12091 code = swap_condition (code);
12093 if (GET_MODE (ix86_compare_op0) == DImode)
12095 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12096 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12101 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12102 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12106 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12107 avoid two branches. This costs one extra insn, so disable when
12108 optimizing for size. */
12110 if ((code == EQ || code == NE)
12112 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12117 if (hi[1] != const0_rtx)
12118 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12119 NULL_RTX, 0, OPTAB_WIDEN);
12122 if (lo[1] != const0_rtx)
12123 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12124 NULL_RTX, 0, OPTAB_WIDEN);
12126 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12127 NULL_RTX, 0, OPTAB_WIDEN);
12129 ix86_compare_op0 = tmp;
12130 ix86_compare_op1 = const0_rtx;
12131 ix86_expand_branch (code, label);
12135 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12136 op1 is a constant and the low word is zero, then we can just
12137 examine the high word. Similarly for low word -1 and
12138 less-or-equal-than or greater-than. */
12140 if (CONST_INT_P (hi[1]))
12143 case LT: case LTU: case GE: case GEU:
12144 if (lo[1] == const0_rtx)
12146 ix86_compare_op0 = hi[0];
12147 ix86_compare_op1 = hi[1];
12148 ix86_expand_branch (code, label);
12151 case LE: case LEU: case GT: case GTU:
12152 if (lo[1] == constm1_rtx)
12154 ix86_compare_op0 = hi[0];
12155 ix86_compare_op1 = hi[1];
12156 ix86_expand_branch (code, label);
12163 /* Otherwise, we need two or three jumps. */
12165 label2 = gen_label_rtx ();
12168 code2 = swap_condition (code);
12169 code3 = unsigned_condition (code);
12173 case LT: case GT: case LTU: case GTU:
12176 case LE: code1 = LT; code2 = GT; break;
12177 case GE: code1 = GT; code2 = LT; break;
12178 case LEU: code1 = LTU; code2 = GTU; break;
12179 case GEU: code1 = GTU; code2 = LTU; break;
12181 case EQ: code1 = UNKNOWN; code2 = NE; break;
12182 case NE: code2 = UNKNOWN; break;
12185 gcc_unreachable ();
12190 * if (hi(a) < hi(b)) goto true;
12191 * if (hi(a) > hi(b)) goto false;
12192 * if (lo(a) < lo(b)) goto true;
12196 ix86_compare_op0 = hi[0];
12197 ix86_compare_op1 = hi[1];
12199 if (code1 != UNKNOWN)
12200 ix86_expand_branch (code1, label);
12201 if (code2 != UNKNOWN)
12202 ix86_expand_branch (code2, label2);
12204 ix86_compare_op0 = lo[0];
12205 ix86_compare_op1 = lo[1];
12206 ix86_expand_branch (code3, label);
12208 if (code2 != UNKNOWN)
12209 emit_label (label2);
12214 gcc_unreachable ();
12218 /* Split branch based on floating point condition. */
12220 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12221 rtx target1, rtx target2, rtx tmp, rtx pushed)
12223 rtx second, bypass;
12224 rtx label = NULL_RTX;
12226 int bypass_probability = -1, second_probability = -1, probability = -1;
12229 if (target2 != pc_rtx)
12232 code = reverse_condition_maybe_unordered (code);
12237 condition = ix86_expand_fp_compare (code, op1, op2,
12238 tmp, &second, &bypass);
12240 /* Remove pushed operand from stack. */
12242 ix86_free_from_memory (GET_MODE (pushed));
12244 if (split_branch_probability >= 0)
12246 /* Distribute the probabilities across the jumps.
12247 Assume the BYPASS and SECOND to be always test
12249 probability = split_branch_probability;
12251 /* Value of 1 is low enough to make no need for probability
12252 to be updated. Later we may run some experiments and see
12253 if unordered values are more frequent in practice. */
12255 bypass_probability = 1;
12257 second_probability = 1;
12259 if (bypass != NULL_RTX)
12261 label = gen_label_rtx ();
12262 i = emit_jump_insn (gen_rtx_SET
12264 gen_rtx_IF_THEN_ELSE (VOIDmode,
12266 gen_rtx_LABEL_REF (VOIDmode,
12269 if (bypass_probability >= 0)
12271 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12272 GEN_INT (bypass_probability),
12275 i = emit_jump_insn (gen_rtx_SET
12277 gen_rtx_IF_THEN_ELSE (VOIDmode,
12278 condition, target1, target2)));
12279 if (probability >= 0)
12281 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12282 GEN_INT (probability),
12284 if (second != NULL_RTX)
12286 i = emit_jump_insn (gen_rtx_SET
12288 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12290 if (second_probability >= 0)
12292 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12293 GEN_INT (second_probability),
12296 if (label != NULL_RTX)
12297 emit_label (label);
12301 ix86_expand_setcc (enum rtx_code code, rtx dest)
12303 rtx ret, tmp, tmpreg, equiv;
12304 rtx second_test, bypass_test;
12306 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12307 return 0; /* FAIL */
12309 gcc_assert (GET_MODE (dest) == QImode);
12311 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12312 PUT_MODE (ret, QImode);
12317 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12318 if (bypass_test || second_test)
12320 rtx test = second_test;
12322 rtx tmp2 = gen_reg_rtx (QImode);
12325 gcc_assert (!second_test);
12326 test = bypass_test;
12328 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12330 PUT_MODE (test, QImode);
12331 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12334 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12336 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12339 /* Attach a REG_EQUAL note describing the comparison result. */
12340 if (ix86_compare_op0 && ix86_compare_op1)
12342 equiv = simplify_gen_relational (code, QImode,
12343 GET_MODE (ix86_compare_op0),
12344 ix86_compare_op0, ix86_compare_op1);
12345 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12348 return 1; /* DONE */
12351 /* Expand comparison setting or clearing carry flag. Return true when
12352 successful and set pop for the operation. */
12354 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12356 enum machine_mode mode =
12357 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12359 /* Do not handle DImode compares that go through special path. */
12360 if (mode == (TARGET_64BIT ? TImode : DImode))
12363 if (SCALAR_FLOAT_MODE_P (mode))
12365 rtx second_test = NULL, bypass_test = NULL;
12366 rtx compare_op, compare_seq;
12368 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12370 /* Shortcut: following common codes never translate
12371 into carry flag compares. */
12372 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12373 || code == ORDERED || code == UNORDERED)
12376 /* These comparisons require zero flag; swap operands so they won't. */
12377 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12378 && !TARGET_IEEE_FP)
12383 code = swap_condition (code);
12386 /* Try to expand the comparison and verify that we end up with
12387 carry flag based comparison. This fails to be true only when
12388 we decide to expand comparison using arithmetic that is not
12389 too common scenario. */
12391 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12392 &second_test, &bypass_test);
12393 compare_seq = get_insns ();
12396 if (second_test || bypass_test)
12399 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12400 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12401 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12403 code = GET_CODE (compare_op);
12405 if (code != LTU && code != GEU)
12408 emit_insn (compare_seq);
12413 if (!INTEGRAL_MODE_P (mode))
12422 /* Convert a==0 into (unsigned)a<1. */
12425 if (op1 != const0_rtx)
12428 code = (code == EQ ? LTU : GEU);
12431 /* Convert a>b into b<a or a>=b-1. */
12434 if (CONST_INT_P (op1))
12436 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12437 /* Bail out on overflow. We still can swap operands but that
12438 would force loading of the constant into register. */
12439 if (op1 == const0_rtx
12440 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12442 code = (code == GTU ? GEU : LTU);
12449 code = (code == GTU ? LTU : GEU);
12453 /* Convert a>=0 into (unsigned)a<0x80000000. */
12456 if (mode == DImode || op1 != const0_rtx)
12458 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12459 code = (code == LT ? GEU : LTU);
12463 if (mode == DImode || op1 != constm1_rtx)
12465 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12466 code = (code == LE ? GEU : LTU);
12472 /* Swapping operands may cause constant to appear as first operand. */
12473 if (!nonimmediate_operand (op0, VOIDmode))
12475 if (!can_create_pseudo_p ())
12477 op0 = force_reg (mode, op0);
12479 ix86_compare_op0 = op0;
12480 ix86_compare_op1 = op1;
12481 *pop = ix86_expand_compare (code, NULL, NULL);
12482 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12487 ix86_expand_int_movcc (rtx operands[])
12489 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12490 rtx compare_seq, compare_op;
12491 rtx second_test, bypass_test;
12492 enum machine_mode mode = GET_MODE (operands[0]);
12493 bool sign_bit_compare_p = false;;
12496 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12497 compare_seq = get_insns ();
12500 compare_code = GET_CODE (compare_op);
12502 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12503 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12504 sign_bit_compare_p = true;
12506 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12507 HImode insns, we'd be swallowed in word prefix ops. */
12509 if ((mode != HImode || TARGET_FAST_PREFIX)
12510 && (mode != (TARGET_64BIT ? TImode : DImode))
12511 && CONST_INT_P (operands[2])
12512 && CONST_INT_P (operands[3]))
12514 rtx out = operands[0];
12515 HOST_WIDE_INT ct = INTVAL (operands[2]);
12516 HOST_WIDE_INT cf = INTVAL (operands[3]);
12517 HOST_WIDE_INT diff;
12520 /* Sign bit compares are better done using shifts than we do by using
12522 if (sign_bit_compare_p
12523 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12524 ix86_compare_op1, &compare_op))
12526 /* Detect overlap between destination and compare sources. */
12529 if (!sign_bit_compare_p)
12531 bool fpcmp = false;
12533 compare_code = GET_CODE (compare_op);
12535 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12536 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12539 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12542 /* To simplify rest of code, restrict to the GEU case. */
12543 if (compare_code == LTU)
12545 HOST_WIDE_INT tmp = ct;
12548 compare_code = reverse_condition (compare_code);
12549 code = reverse_condition (code);
12554 PUT_CODE (compare_op,
12555 reverse_condition_maybe_unordered
12556 (GET_CODE (compare_op)));
12558 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12562 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12563 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12564 tmp = gen_reg_rtx (mode);
12566 if (mode == DImode)
12567 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12569 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12573 if (code == GT || code == GE)
12574 code = reverse_condition (code);
12577 HOST_WIDE_INT tmp = ct;
12582 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12583 ix86_compare_op1, VOIDmode, 0, -1);
12596 tmp = expand_simple_binop (mode, PLUS,
12598 copy_rtx (tmp), 1, OPTAB_DIRECT);
12609 tmp = expand_simple_binop (mode, IOR,
12611 copy_rtx (tmp), 1, OPTAB_DIRECT);
12613 else if (diff == -1 && ct)
12623 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12625 tmp = expand_simple_binop (mode, PLUS,
12626 copy_rtx (tmp), GEN_INT (cf),
12627 copy_rtx (tmp), 1, OPTAB_DIRECT);
12635 * andl cf - ct, dest
12645 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12648 tmp = expand_simple_binop (mode, AND,
12650 gen_int_mode (cf - ct, mode),
12651 copy_rtx (tmp), 1, OPTAB_DIRECT);
12653 tmp = expand_simple_binop (mode, PLUS,
12654 copy_rtx (tmp), GEN_INT (ct),
12655 copy_rtx (tmp), 1, OPTAB_DIRECT);
12658 if (!rtx_equal_p (tmp, out))
12659 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12661 return 1; /* DONE */
12666 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12669 tmp = ct, ct = cf, cf = tmp;
12672 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12674 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12676 /* We may be reversing unordered compare to normal compare, that
12677 is not valid in general (we may convert non-trapping condition
12678 to trapping one), however on i386 we currently emit all
12679 comparisons unordered. */
12680 compare_code = reverse_condition_maybe_unordered (compare_code);
12681 code = reverse_condition_maybe_unordered (code);
12685 compare_code = reverse_condition (compare_code);
12686 code = reverse_condition (code);
12690 compare_code = UNKNOWN;
12691 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12692 && CONST_INT_P (ix86_compare_op1))
12694 if (ix86_compare_op1 == const0_rtx
12695 && (code == LT || code == GE))
12696 compare_code = code;
12697 else if (ix86_compare_op1 == constm1_rtx)
12701 else if (code == GT)
12706 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12707 if (compare_code != UNKNOWN
12708 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12709 && (cf == -1 || ct == -1))
12711 /* If lea code below could be used, only optimize
12712 if it results in a 2 insn sequence. */
12714 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12715 || diff == 3 || diff == 5 || diff == 9)
12716 || (compare_code == LT && ct == -1)
12717 || (compare_code == GE && cf == -1))
12720 * notl op1 (if necessary)
12728 code = reverse_condition (code);
12731 out = emit_store_flag (out, code, ix86_compare_op0,
12732 ix86_compare_op1, VOIDmode, 0, -1);
12734 out = expand_simple_binop (mode, IOR,
12736 out, 1, OPTAB_DIRECT);
12737 if (out != operands[0])
12738 emit_move_insn (operands[0], out);
12740 return 1; /* DONE */
12745 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12746 || diff == 3 || diff == 5 || diff == 9)
12747 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12749 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12755 * lea cf(dest*(ct-cf)),dest
12759 * This also catches the degenerate setcc-only case.
12765 out = emit_store_flag (out, code, ix86_compare_op0,
12766 ix86_compare_op1, VOIDmode, 0, 1);
12769 /* On x86_64 the lea instruction operates on Pmode, so we need
12770 to get arithmetics done in proper mode to match. */
12772 tmp = copy_rtx (out);
12776 out1 = copy_rtx (out);
12777 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12781 tmp = gen_rtx_PLUS (mode, tmp, out1);
12787 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12790 if (!rtx_equal_p (tmp, out))
12793 out = force_operand (tmp, copy_rtx (out));
12795 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12797 if (!rtx_equal_p (out, operands[0]))
12798 emit_move_insn (operands[0], copy_rtx (out));
12800 return 1; /* DONE */
12804 * General case: Jumpful:
12805 * xorl dest,dest cmpl op1, op2
12806 * cmpl op1, op2 movl ct, dest
12807 * setcc dest jcc 1f
12808 * decl dest movl cf, dest
12809 * andl (cf-ct),dest 1:
12812 * Size 20. Size 14.
12814 * This is reasonably steep, but branch mispredict costs are
12815 * high on modern cpus, so consider failing only if optimizing
12819 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12820 && BRANCH_COST >= 2)
12824 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12829 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12831 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12833 /* We may be reversing unordered compare to normal compare,
12834 that is not valid in general (we may convert non-trapping
12835 condition to trapping one), however on i386 we currently
12836 emit all comparisons unordered. */
12837 code = reverse_condition_maybe_unordered (code);
12841 code = reverse_condition (code);
12842 if (compare_code != UNKNOWN)
12843 compare_code = reverse_condition (compare_code);
12847 if (compare_code != UNKNOWN)
12849 /* notl op1 (if needed)
12854 For x < 0 (resp. x <= -1) there will be no notl,
12855 so if possible swap the constants to get rid of the
12857 True/false will be -1/0 while code below (store flag
12858 followed by decrement) is 0/-1, so the constants need
12859 to be exchanged once more. */
12861 if (compare_code == GE || !cf)
12863 code = reverse_condition (code);
12868 HOST_WIDE_INT tmp = cf;
12873 out = emit_store_flag (out, code, ix86_compare_op0,
12874 ix86_compare_op1, VOIDmode, 0, -1);
12878 out = emit_store_flag (out, code, ix86_compare_op0,
12879 ix86_compare_op1, VOIDmode, 0, 1);
12881 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12882 copy_rtx (out), 1, OPTAB_DIRECT);
12885 out = expand_simple_binop (mode, AND, copy_rtx (out),
12886 gen_int_mode (cf - ct, mode),
12887 copy_rtx (out), 1, OPTAB_DIRECT);
12889 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12890 copy_rtx (out), 1, OPTAB_DIRECT);
12891 if (!rtx_equal_p (out, operands[0]))
12892 emit_move_insn (operands[0], copy_rtx (out));
12894 return 1; /* DONE */
12898 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12900 /* Try a few things more with specific constants and a variable. */
12903 rtx var, orig_out, out, tmp;
12905 if (BRANCH_COST <= 2)
12906 return 0; /* FAIL */
12908 /* If one of the two operands is an interesting constant, load a
12909 constant with the above and mask it in with a logical operation. */
12911 if (CONST_INT_P (operands[2]))
12914 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12915 operands[3] = constm1_rtx, op = and_optab;
12916 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12917 operands[3] = const0_rtx, op = ior_optab;
12919 return 0; /* FAIL */
12921 else if (CONST_INT_P (operands[3]))
12924 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12925 operands[2] = constm1_rtx, op = and_optab;
12926 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12927 operands[2] = const0_rtx, op = ior_optab;
12929 return 0; /* FAIL */
12932 return 0; /* FAIL */
12934 orig_out = operands[0];
12935 tmp = gen_reg_rtx (mode);
12938 /* Recurse to get the constant loaded. */
12939 if (ix86_expand_int_movcc (operands) == 0)
12940 return 0; /* FAIL */
12942 /* Mask in the interesting variable. */
12943 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12945 if (!rtx_equal_p (out, orig_out))
12946 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12948 return 1; /* DONE */
12952 * For comparison with above,
12962 if (! nonimmediate_operand (operands[2], mode))
12963 operands[2] = force_reg (mode, operands[2]);
12964 if (! nonimmediate_operand (operands[3], mode))
12965 operands[3] = force_reg (mode, operands[3]);
12967 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12969 rtx tmp = gen_reg_rtx (mode);
12970 emit_move_insn (tmp, operands[3]);
12973 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12975 rtx tmp = gen_reg_rtx (mode);
12976 emit_move_insn (tmp, operands[2]);
12980 if (! register_operand (operands[2], VOIDmode)
12982 || ! register_operand (operands[3], VOIDmode)))
12983 operands[2] = force_reg (mode, operands[2]);
12986 && ! register_operand (operands[3], VOIDmode))
12987 operands[3] = force_reg (mode, operands[3]);
12989 emit_insn (compare_seq);
12990 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12991 gen_rtx_IF_THEN_ELSE (mode,
12992 compare_op, operands[2],
12995 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12996 gen_rtx_IF_THEN_ELSE (mode,
12998 copy_rtx (operands[3]),
12999 copy_rtx (operands[0]))));
13001 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13002 gen_rtx_IF_THEN_ELSE (mode,
13004 copy_rtx (operands[2]),
13005 copy_rtx (operands[0]))));
13007 return 1; /* DONE */
13010 /* Swap, force into registers, or otherwise massage the two operands
13011 to an sse comparison with a mask result. Thus we differ a bit from
13012 ix86_prepare_fp_compare_args which expects to produce a flags result.
13014 The DEST operand exists to help determine whether to commute commutative
13015 operators. The POP0/POP1 operands are updated in place. The new
13016 comparison code is returned, or UNKNOWN if not implementable. */
13018 static enum rtx_code
13019 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13020 rtx *pop0, rtx *pop1)
13028 /* We have no LTGT as an operator. We could implement it with
13029 NE & ORDERED, but this requires an extra temporary. It's
13030 not clear that it's worth it. */
13037 /* These are supported directly. */
13044 /* For commutative operators, try to canonicalize the destination
13045 operand to be first in the comparison - this helps reload to
13046 avoid extra moves. */
13047 if (!dest || !rtx_equal_p (dest, *pop1))
13055 /* These are not supported directly. Swap the comparison operands
13056 to transform into something that is supported. */
13060 code = swap_condition (code);
13064 gcc_unreachable ();
13070 /* Detect conditional moves that exactly match min/max operational
13071 semantics. Note that this is IEEE safe, as long as we don't
13072 interchange the operands.
13074 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13075 and TRUE if the operation is successful and instructions are emitted. */
13078 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13079 rtx cmp_op1, rtx if_true, rtx if_false)
13081 enum machine_mode mode;
13087 else if (code == UNGE)
13090 if_true = if_false;
13096 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13098 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13103 mode = GET_MODE (dest);
13105 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13106 but MODE may be a vector mode and thus not appropriate. */
13107 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13109 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13112 if_true = force_reg (mode, if_true);
13113 v = gen_rtvec (2, if_true, if_false);
13114 tmp = gen_rtx_UNSPEC (mode, v, u);
13118 code = is_min ? SMIN : SMAX;
13119 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13122 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13126 /* Expand an sse vector comparison. Return the register with the result. */
13129 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13130 rtx op_true, rtx op_false)
13132 enum machine_mode mode = GET_MODE (dest);
13135 cmp_op0 = force_reg (mode, cmp_op0);
13136 if (!nonimmediate_operand (cmp_op1, mode))
13137 cmp_op1 = force_reg (mode, cmp_op1);
13140 || reg_overlap_mentioned_p (dest, op_true)
13141 || reg_overlap_mentioned_p (dest, op_false))
13142 dest = gen_reg_rtx (mode);
13144 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13145 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13150 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13151 operations. This is used for both scalar and vector conditional moves. */
13154 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13156 enum machine_mode mode = GET_MODE (dest);
13161 rtx pcmov = gen_rtx_SET (mode, dest,
13162 gen_rtx_IF_THEN_ELSE (mode, cmp,
13167 else if (op_false == CONST0_RTX (mode))
13169 op_true = force_reg (mode, op_true);
13170 x = gen_rtx_AND (mode, cmp, op_true);
13171 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13173 else if (op_true == CONST0_RTX (mode))
13175 op_false = force_reg (mode, op_false);
13176 x = gen_rtx_NOT (mode, cmp);
13177 x = gen_rtx_AND (mode, x, op_false);
13178 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13182 op_true = force_reg (mode, op_true);
13183 op_false = force_reg (mode, op_false);
13185 t2 = gen_reg_rtx (mode);
13187 t3 = gen_reg_rtx (mode);
13191 x = gen_rtx_AND (mode, op_true, cmp);
13192 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13194 x = gen_rtx_NOT (mode, cmp);
13195 x = gen_rtx_AND (mode, x, op_false);
13196 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13198 x = gen_rtx_IOR (mode, t3, t2);
13199 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13203 /* Expand a floating-point conditional move. Return true if successful. */
13206 ix86_expand_fp_movcc (rtx operands[])
13208 enum machine_mode mode = GET_MODE (operands[0]);
13209 enum rtx_code code = GET_CODE (operands[1]);
13210 rtx tmp, compare_op, second_test, bypass_test;
13212 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13214 enum machine_mode cmode;
13216 /* Since we've no cmove for sse registers, don't force bad register
13217 allocation just to gain access to it. Deny movcc when the
13218 comparison mode doesn't match the move mode. */
13219 cmode = GET_MODE (ix86_compare_op0);
13220 if (cmode == VOIDmode)
13221 cmode = GET_MODE (ix86_compare_op1);
13225 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13227 &ix86_compare_op1);
13228 if (code == UNKNOWN)
13231 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13232 ix86_compare_op1, operands[2],
13236 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13237 ix86_compare_op1, operands[2], operands[3]);
13238 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13242 /* The floating point conditional move instructions don't directly
13243 support conditions resulting from a signed integer comparison. */
13245 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13247 /* The floating point conditional move instructions don't directly
13248 support signed integer comparisons. */
13250 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13252 gcc_assert (!second_test && !bypass_test);
13253 tmp = gen_reg_rtx (QImode);
13254 ix86_expand_setcc (code, tmp);
13256 ix86_compare_op0 = tmp;
13257 ix86_compare_op1 = const0_rtx;
13258 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13260 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13262 tmp = gen_reg_rtx (mode);
13263 emit_move_insn (tmp, operands[3]);
13266 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13268 tmp = gen_reg_rtx (mode);
13269 emit_move_insn (tmp, operands[2]);
13273 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13274 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13275 operands[2], operands[3])));
13277 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13278 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13279 operands[3], operands[0])));
13281 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13282 gen_rtx_IF_THEN_ELSE (mode, second_test,
13283 operands[2], operands[0])));
13288 /* Expand a floating-point vector conditional move; a vcond operation
13289 rather than a movcc operation. */
13292 ix86_expand_fp_vcond (rtx operands[])
13294 enum rtx_code code = GET_CODE (operands[3]);
13297 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13298 &operands[4], &operands[5]);
13299 if (code == UNKNOWN)
13302 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13303 operands[5], operands[1], operands[2]))
13306 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13307 operands[1], operands[2]);
13308 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13312 /* Expand a signed/unsigned integral vector conditional move. */
13315 ix86_expand_int_vcond (rtx operands[])
13317 enum machine_mode mode = GET_MODE (operands[0]);
13318 enum rtx_code code = GET_CODE (operands[3]);
13319 bool negate = false;
13322 cop0 = operands[4];
13323 cop1 = operands[5];
13325 /* Canonicalize the comparison to EQ, GT, GTU. */
13336 code = reverse_condition (code);
13342 code = reverse_condition (code);
13348 code = swap_condition (code);
13349 x = cop0, cop0 = cop1, cop1 = x;
13353 gcc_unreachable ();
13356 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13357 if (mode == V2DImode)
13362 /* SSE4.1 supports EQ. */
13363 if (!TARGET_SSE4_1)
13369 /* SSE4.2 supports GT/GTU. */
13370 if (!TARGET_SSE4_2)
13375 gcc_unreachable ();
13379 /* Unsigned parallel compare is not supported by the hardware. Play some
13380 tricks to turn this into a signed comparison against 0. */
13383 cop0 = force_reg (mode, cop0);
13392 /* Perform a parallel modulo subtraction. */
13393 t1 = gen_reg_rtx (mode);
13394 emit_insn ((mode == V4SImode
13396 : gen_subv2di3) (t1, cop0, cop1));
13398 /* Extract the original sign bit of op0. */
13399 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13401 t2 = gen_reg_rtx (mode);
13402 emit_insn ((mode == V4SImode
13404 : gen_andv2di3) (t2, cop0, mask));
13406 /* XOR it back into the result of the subtraction. This results
13407 in the sign bit set iff we saw unsigned underflow. */
13408 x = gen_reg_rtx (mode);
13409 emit_insn ((mode == V4SImode
13411 : gen_xorv2di3) (x, t1, t2));
13419 /* Perform a parallel unsigned saturating subtraction. */
13420 x = gen_reg_rtx (mode);
13421 emit_insn (gen_rtx_SET (VOIDmode, x,
13422 gen_rtx_US_MINUS (mode, cop0, cop1)));
13429 gcc_unreachable ();
13433 cop1 = CONST0_RTX (mode);
13436 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13437 operands[1+negate], operands[2-negate]);
13439 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13440 operands[2-negate]);
13444 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13445 true if we should do zero extension, else sign extension. HIGH_P is
13446 true if we want the N/2 high elements, else the low elements. */
13449 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13451 enum machine_mode imode = GET_MODE (operands[1]);
13452 rtx (*unpack)(rtx, rtx, rtx);
13459 unpack = gen_vec_interleave_highv16qi;
13461 unpack = gen_vec_interleave_lowv16qi;
13465 unpack = gen_vec_interleave_highv8hi;
13467 unpack = gen_vec_interleave_lowv8hi;
13471 unpack = gen_vec_interleave_highv4si;
13473 unpack = gen_vec_interleave_lowv4si;
13476 gcc_unreachable ();
13479 dest = gen_lowpart (imode, operands[0]);
13482 se = force_reg (imode, CONST0_RTX (imode));
13484 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13485 operands[1], pc_rtx, pc_rtx);
13487 emit_insn (unpack (dest, operands[1], se));
13490 /* This function performs the same task as ix86_expand_sse_unpack,
13491 but with SSE4.1 instructions. */
13494 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13496 enum machine_mode imode = GET_MODE (operands[1]);
13497 rtx (*unpack)(rtx, rtx);
13504 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13506 unpack = gen_sse4_1_extendv8qiv8hi2;
13510 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13512 unpack = gen_sse4_1_extendv4hiv4si2;
13516 unpack = gen_sse4_1_zero_extendv2siv2di2;
13518 unpack = gen_sse4_1_extendv2siv2di2;
13521 gcc_unreachable ();
13524 dest = operands[0];
13527 /* Shift higher 8 bytes to lower 8 bytes. */
13528 src = gen_reg_rtx (imode);
13529 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13530 gen_lowpart (TImode, operands[1]),
13536 emit_insn (unpack (dest, src));
13539 /* This function performs the same task as ix86_expand_sse_unpack,
13540 but with amdfam15 instructions. */
13542 #define PPERM_SRC 0x00 /* copy source */
13543 #define PPERM_INVERT 0x20 /* invert source */
13544 #define PPERM_REVERSE 0x40 /* bit reverse source */
13545 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13546 #define PPERM_ZERO 0x80 /* all 0's */
13547 #define PPERM_ONES 0xa0 /* all 1's */
13548 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13549 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13551 #define PPERM_SRC1 0x00 /* use first source byte */
13552 #define PPERM_SRC2 0x10 /* use second source byte */
13555 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13557 enum machine_mode imode = GET_MODE (operands[1]);
13558 int pperm_bytes[16];
13560 int h = (high_p) ? 8 : 0;
13563 rtvec v = rtvec_alloc (16);
13566 rtx op0 = operands[0], op1 = operands[1];
13571 vs = rtvec_alloc (8);
13572 h2 = (high_p) ? 8 : 0;
13573 for (i = 0; i < 8; i++)
13575 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13576 pperm_bytes[2*i+1] = ((unsigned_p)
13578 : PPERM_SIGN | PPERM_SRC2 | i | h);
13581 for (i = 0; i < 16; i++)
13582 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13584 for (i = 0; i < 8; i++)
13585 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13587 p = gen_rtx_PARALLEL (VOIDmode, vs);
13588 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13590 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13592 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13596 vs = rtvec_alloc (4);
13597 h2 = (high_p) ? 4 : 0;
13598 for (i = 0; i < 4; i++)
13600 sign_extend = ((unsigned_p)
13602 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13603 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13604 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13605 pperm_bytes[4*i+2] = sign_extend;
13606 pperm_bytes[4*i+3] = sign_extend;
13609 for (i = 0; i < 16; i++)
13610 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13612 for (i = 0; i < 4; i++)
13613 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13615 p = gen_rtx_PARALLEL (VOIDmode, vs);
13616 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13618 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13620 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13624 vs = rtvec_alloc (2);
13625 h2 = (high_p) ? 2 : 0;
13626 for (i = 0; i < 2; i++)
13628 sign_extend = ((unsigned_p)
13630 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13631 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13632 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13633 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13634 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13635 pperm_bytes[8*i+4] = sign_extend;
13636 pperm_bytes[8*i+5] = sign_extend;
13637 pperm_bytes[8*i+6] = sign_extend;
13638 pperm_bytes[8*i+7] = sign_extend;
13641 for (i = 0; i < 16; i++)
13642 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13644 for (i = 0; i < 2; i++)
13645 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13647 p = gen_rtx_PARALLEL (VOIDmode, vs);
13648 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13650 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13652 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13656 gcc_unreachable ();
13662 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13663 next narrower integer vector type */
13665 ix86_expand_sse5_pack (rtx operands[3])
13667 enum machine_mode imode = GET_MODE (operands[0]);
13668 int pperm_bytes[16];
13670 rtvec v = rtvec_alloc (16);
13672 rtx op0 = operands[0];
13673 rtx op1 = operands[1];
13674 rtx op2 = operands[2];
13679 for (i = 0; i < 8; i++)
13681 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13682 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13685 for (i = 0; i < 16; i++)
13686 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13688 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13689 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13693 for (i = 0; i < 4; i++)
13695 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13696 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13697 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13698 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13701 for (i = 0; i < 16; i++)
13702 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13704 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13705 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13709 for (i = 0; i < 2; i++)
13711 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13712 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13713 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13714 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13715 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13716 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13717 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13718 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13721 for (i = 0; i < 16; i++)
13722 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13724 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13725 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13729 gcc_unreachable ();
13735 /* Expand conditional increment or decrement using adb/sbb instructions.
13736 The default case using setcc followed by the conditional move can be
13737 done by generic code. */
13739 ix86_expand_int_addcc (rtx operands[])
13741 enum rtx_code code = GET_CODE (operands[1]);
13743 rtx val = const0_rtx;
13744 bool fpcmp = false;
13745 enum machine_mode mode = GET_MODE (operands[0]);
13747 if (operands[3] != const1_rtx
13748 && operands[3] != constm1_rtx)
13750 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13751 ix86_compare_op1, &compare_op))
13753 code = GET_CODE (compare_op);
13755 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13756 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13759 code = ix86_fp_compare_code_to_integer (code);
13766 PUT_CODE (compare_op,
13767 reverse_condition_maybe_unordered
13768 (GET_CODE (compare_op)));
13770 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13772 PUT_MODE (compare_op, mode);
13774 /* Construct either adc or sbb insn. */
13775 if ((code == LTU) == (operands[3] == constm1_rtx))
13777 switch (GET_MODE (operands[0]))
13780 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13783 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13786 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13789 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13792 gcc_unreachable ();
13797 switch (GET_MODE (operands[0]))
13800 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13803 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13806 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13809 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13812 gcc_unreachable ();
13815 return 1; /* DONE */
13819 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13820 works for floating pointer parameters and nonoffsetable memories.
13821 For pushes, it returns just stack offsets; the values will be saved
13822 in the right order. Maximally three parts are generated. */
13825 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13830 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13832 size = (GET_MODE_SIZE (mode) + 4) / 8;
13834 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13835 gcc_assert (size >= 2 && size <= 3);
13837 /* Optimize constant pool reference to immediates. This is used by fp
13838 moves, that force all constants to memory to allow combining. */
13839 if (MEM_P (operand) && MEM_READONLY_P (operand))
13841 rtx tmp = maybe_get_pool_constant (operand);
13846 if (MEM_P (operand) && !offsettable_memref_p (operand))
13848 /* The only non-offsetable memories we handle are pushes. */
13849 int ok = push_operand (operand, VOIDmode);
13853 operand = copy_rtx (operand);
13854 PUT_MODE (operand, Pmode);
13855 parts[0] = parts[1] = parts[2] = operand;
13859 if (GET_CODE (operand) == CONST_VECTOR)
13861 enum machine_mode imode = int_mode_for_mode (mode);
13862 /* Caution: if we looked through a constant pool memory above,
13863 the operand may actually have a different mode now. That's
13864 ok, since we want to pun this all the way back to an integer. */
13865 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13866 gcc_assert (operand != NULL);
13872 if (mode == DImode)
13873 split_di (&operand, 1, &parts[0], &parts[1]);
13876 if (REG_P (operand))
13878 gcc_assert (reload_completed);
13879 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13880 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13882 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13884 else if (offsettable_memref_p (operand))
13886 operand = adjust_address (operand, SImode, 0);
13887 parts[0] = operand;
13888 parts[1] = adjust_address (operand, SImode, 4);
13890 parts[2] = adjust_address (operand, SImode, 8);
13892 else if (GET_CODE (operand) == CONST_DOUBLE)
13897 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13901 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13902 parts[2] = gen_int_mode (l[2], SImode);
13905 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13908 gcc_unreachable ();
13910 parts[1] = gen_int_mode (l[1], SImode);
13911 parts[0] = gen_int_mode (l[0], SImode);
13914 gcc_unreachable ();
13919 if (mode == TImode)
13920 split_ti (&operand, 1, &parts[0], &parts[1]);
13921 if (mode == XFmode || mode == TFmode)
13923 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13924 if (REG_P (operand))
13926 gcc_assert (reload_completed);
13927 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13928 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13930 else if (offsettable_memref_p (operand))
13932 operand = adjust_address (operand, DImode, 0);
13933 parts[0] = operand;
13934 parts[1] = adjust_address (operand, upper_mode, 8);
13936 else if (GET_CODE (operand) == CONST_DOUBLE)
13941 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13942 real_to_target (l, &r, mode);
13944 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13945 if (HOST_BITS_PER_WIDE_INT >= 64)
13948 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13949 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13952 parts[0] = immed_double_const (l[0], l[1], DImode);
13954 if (upper_mode == SImode)
13955 parts[1] = gen_int_mode (l[2], SImode);
13956 else if (HOST_BITS_PER_WIDE_INT >= 64)
13959 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13960 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13963 parts[1] = immed_double_const (l[2], l[3], DImode);
13966 gcc_unreachable ();
13973 /* Emit insns to perform a move or push of DI, DF, and XF values.
13974 Return false when normal moves are needed; true when all required
13975 insns have been emitted. Operands 2-4 contain the input values
13976 int the correct order; operands 5-7 contain the output values. */
13979 ix86_split_long_move (rtx operands[])
13984 int collisions = 0;
13985 enum machine_mode mode = GET_MODE (operands[0]);
13987 /* The DFmode expanders may ask us to move double.
13988 For 64bit target this is single move. By hiding the fact
13989 here we simplify i386.md splitters. */
13990 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13992 /* Optimize constant pool reference to immediates. This is used by
13993 fp moves, that force all constants to memory to allow combining. */
13995 if (MEM_P (operands[1])
13996 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13997 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13998 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13999 if (push_operand (operands[0], VOIDmode))
14001 operands[0] = copy_rtx (operands[0]);
14002 PUT_MODE (operands[0], Pmode);
14005 operands[0] = gen_lowpart (DImode, operands[0]);
14006 operands[1] = gen_lowpart (DImode, operands[1]);
14007 emit_move_insn (operands[0], operands[1]);
14011 /* The only non-offsettable memory we handle is push. */
14012 if (push_operand (operands[0], VOIDmode))
14015 gcc_assert (!MEM_P (operands[0])
14016 || offsettable_memref_p (operands[0]));
14018 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14019 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14021 /* When emitting push, take care for source operands on the stack. */
14022 if (push && MEM_P (operands[1])
14023 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14026 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
14027 XEXP (part[1][2], 0));
14028 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
14029 XEXP (part[1][1], 0));
14032 /* We need to do copy in the right order in case an address register
14033 of the source overlaps the destination. */
14034 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14036 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14038 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14041 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14044 /* Collision in the middle part can be handled by reordering. */
14045 if (collisions == 1 && nparts == 3
14046 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14049 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14050 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14053 /* If there are more collisions, we can't handle it by reordering.
14054 Do an lea to the last part and use only one colliding move. */
14055 else if (collisions > 1)
14061 base = part[0][nparts - 1];
14063 /* Handle the case when the last part isn't valid for lea.
14064 Happens in 64-bit mode storing the 12-byte XFmode. */
14065 if (GET_MODE (base) != Pmode)
14066 base = gen_rtx_REG (Pmode, REGNO (base));
14068 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14069 part[1][0] = replace_equiv_address (part[1][0], base);
14070 part[1][1] = replace_equiv_address (part[1][1],
14071 plus_constant (base, UNITS_PER_WORD));
14073 part[1][2] = replace_equiv_address (part[1][2],
14074 plus_constant (base, 8));
14084 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14085 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14086 emit_move_insn (part[0][2], part[1][2]);
14091 /* In 64bit mode we don't have 32bit push available. In case this is
14092 register, it is OK - we will just use larger counterpart. We also
14093 retype memory - these comes from attempt to avoid REX prefix on
14094 moving of second half of TFmode value. */
14095 if (GET_MODE (part[1][1]) == SImode)
14097 switch (GET_CODE (part[1][1]))
14100 part[1][1] = adjust_address (part[1][1], DImode, 0);
14104 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14108 gcc_unreachable ();
14111 if (GET_MODE (part[1][0]) == SImode)
14112 part[1][0] = part[1][1];
14115 emit_move_insn (part[0][1], part[1][1]);
14116 emit_move_insn (part[0][0], part[1][0]);
14120 /* Choose correct order to not overwrite the source before it is copied. */
14121 if ((REG_P (part[0][0])
14122 && REG_P (part[1][1])
14123 && (REGNO (part[0][0]) == REGNO (part[1][1])
14125 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14127 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14131 operands[2] = part[0][2];
14132 operands[3] = part[0][1];
14133 operands[4] = part[0][0];
14134 operands[5] = part[1][2];
14135 operands[6] = part[1][1];
14136 operands[7] = part[1][0];
14140 operands[2] = part[0][1];
14141 operands[3] = part[0][0];
14142 operands[5] = part[1][1];
14143 operands[6] = part[1][0];
14150 operands[2] = part[0][0];
14151 operands[3] = part[0][1];
14152 operands[4] = part[0][2];
14153 operands[5] = part[1][0];
14154 operands[6] = part[1][1];
14155 operands[7] = part[1][2];
14159 operands[2] = part[0][0];
14160 operands[3] = part[0][1];
14161 operands[5] = part[1][0];
14162 operands[6] = part[1][1];
14166 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14169 if (CONST_INT_P (operands[5])
14170 && operands[5] != const0_rtx
14171 && REG_P (operands[2]))
14173 if (CONST_INT_P (operands[6])
14174 && INTVAL (operands[6]) == INTVAL (operands[5]))
14175 operands[6] = operands[2];
14178 && CONST_INT_P (operands[7])
14179 && INTVAL (operands[7]) == INTVAL (operands[5]))
14180 operands[7] = operands[2];
14184 && CONST_INT_P (operands[6])
14185 && operands[6] != const0_rtx
14186 && REG_P (operands[3])
14187 && CONST_INT_P (operands[7])
14188 && INTVAL (operands[7]) == INTVAL (operands[6]))
14189 operands[7] = operands[3];
14192 emit_move_insn (operands[2], operands[5]);
14193 emit_move_insn (operands[3], operands[6]);
14195 emit_move_insn (operands[4], operands[7]);
14200 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14201 left shift by a constant, either using a single shift or
14202 a sequence of add instructions. */
14205 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14209 emit_insn ((mode == DImode
14211 : gen_adddi3) (operand, operand, operand));
14213 else if (!optimize_size
14214 && count * ix86_cost->add <= ix86_cost->shift_const)
14217 for (i=0; i<count; i++)
14219 emit_insn ((mode == DImode
14221 : gen_adddi3) (operand, operand, operand));
14225 emit_insn ((mode == DImode
14227 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14231 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14233 rtx low[2], high[2];
14235 const int single_width = mode == DImode ? 32 : 64;
14237 if (CONST_INT_P (operands[2]))
14239 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14240 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14242 if (count >= single_width)
14244 emit_move_insn (high[0], low[1]);
14245 emit_move_insn (low[0], const0_rtx);
14247 if (count > single_width)
14248 ix86_expand_ashl_const (high[0], count - single_width, mode);
14252 if (!rtx_equal_p (operands[0], operands[1]))
14253 emit_move_insn (operands[0], operands[1]);
14254 emit_insn ((mode == DImode
14256 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14257 ix86_expand_ashl_const (low[0], count, mode);
14262 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14264 if (operands[1] == const1_rtx)
14266 /* Assuming we've chosen a QImode capable registers, then 1 << N
14267 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14268 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14270 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14272 ix86_expand_clear (low[0]);
14273 ix86_expand_clear (high[0]);
14274 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14276 d = gen_lowpart (QImode, low[0]);
14277 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14278 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14279 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14281 d = gen_lowpart (QImode, high[0]);
14282 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14283 s = gen_rtx_NE (QImode, flags, const0_rtx);
14284 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14287 /* Otherwise, we can get the same results by manually performing
14288 a bit extract operation on bit 5/6, and then performing the two
14289 shifts. The two methods of getting 0/1 into low/high are exactly
14290 the same size. Avoiding the shift in the bit extract case helps
14291 pentium4 a bit; no one else seems to care much either way. */
14296 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14297 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14299 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14300 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14302 emit_insn ((mode == DImode
14304 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14305 emit_insn ((mode == DImode
14307 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14308 emit_move_insn (low[0], high[0]);
14309 emit_insn ((mode == DImode
14311 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14314 emit_insn ((mode == DImode
14316 : gen_ashldi3) (low[0], low[0], operands[2]));
14317 emit_insn ((mode == DImode
14319 : gen_ashldi3) (high[0], high[0], operands[2]));
14323 if (operands[1] == constm1_rtx)
14325 /* For -1 << N, we can avoid the shld instruction, because we
14326 know that we're shifting 0...31/63 ones into a -1. */
14327 emit_move_insn (low[0], constm1_rtx);
14329 emit_move_insn (high[0], low[0]);
14331 emit_move_insn (high[0], constm1_rtx);
14335 if (!rtx_equal_p (operands[0], operands[1]))
14336 emit_move_insn (operands[0], operands[1]);
14338 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14339 emit_insn ((mode == DImode
14341 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14344 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14346 if (TARGET_CMOVE && scratch)
14348 ix86_expand_clear (scratch);
14349 emit_insn ((mode == DImode
14350 ? gen_x86_shift_adj_1
14351 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14354 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14358 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14360 rtx low[2], high[2];
14362 const int single_width = mode == DImode ? 32 : 64;
14364 if (CONST_INT_P (operands[2]))
14366 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14367 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14369 if (count == single_width * 2 - 1)
14371 emit_move_insn (high[0], high[1]);
14372 emit_insn ((mode == DImode
14374 : gen_ashrdi3) (high[0], high[0],
14375 GEN_INT (single_width - 1)));
14376 emit_move_insn (low[0], high[0]);
14379 else if (count >= single_width)
14381 emit_move_insn (low[0], high[1]);
14382 emit_move_insn (high[0], low[0]);
14383 emit_insn ((mode == DImode
14385 : gen_ashrdi3) (high[0], high[0],
14386 GEN_INT (single_width - 1)));
14387 if (count > single_width)
14388 emit_insn ((mode == DImode
14390 : gen_ashrdi3) (low[0], low[0],
14391 GEN_INT (count - single_width)));
14395 if (!rtx_equal_p (operands[0], operands[1]))
14396 emit_move_insn (operands[0], operands[1]);
14397 emit_insn ((mode == DImode
14399 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14400 emit_insn ((mode == DImode
14402 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14407 if (!rtx_equal_p (operands[0], operands[1]))
14408 emit_move_insn (operands[0], operands[1]);
14410 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14412 emit_insn ((mode == DImode
14414 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14415 emit_insn ((mode == DImode
14417 : gen_ashrdi3) (high[0], high[0], operands[2]));
14419 if (TARGET_CMOVE && scratch)
14421 emit_move_insn (scratch, high[0]);
14422 emit_insn ((mode == DImode
14424 : gen_ashrdi3) (scratch, scratch,
14425 GEN_INT (single_width - 1)));
14426 emit_insn ((mode == DImode
14427 ? gen_x86_shift_adj_1
14428 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14432 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14437 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14439 rtx low[2], high[2];
14441 const int single_width = mode == DImode ? 32 : 64;
14443 if (CONST_INT_P (operands[2]))
14445 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14446 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14448 if (count >= single_width)
14450 emit_move_insn (low[0], high[1]);
14451 ix86_expand_clear (high[0]);
14453 if (count > single_width)
14454 emit_insn ((mode == DImode
14456 : gen_lshrdi3) (low[0], low[0],
14457 GEN_INT (count - single_width)));
14461 if (!rtx_equal_p (operands[0], operands[1]))
14462 emit_move_insn (operands[0], operands[1]);
14463 emit_insn ((mode == DImode
14465 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14466 emit_insn ((mode == DImode
14468 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14473 if (!rtx_equal_p (operands[0], operands[1]))
14474 emit_move_insn (operands[0], operands[1]);
14476 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14478 emit_insn ((mode == DImode
14480 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14481 emit_insn ((mode == DImode
14483 : gen_lshrdi3) (high[0], high[0], operands[2]));
14485 /* Heh. By reversing the arguments, we can reuse this pattern. */
14486 if (TARGET_CMOVE && scratch)
14488 ix86_expand_clear (scratch);
14489 emit_insn ((mode == DImode
14490 ? gen_x86_shift_adj_1
14491 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14495 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14499 /* Predict just emitted jump instruction to be taken with probability PROB. */
14501 predict_jump (int prob)
14503 rtx insn = get_last_insn ();
14504 gcc_assert (JUMP_P (insn));
14506 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14511 /* Helper function for the string operations below. Dest VARIABLE whether
14512 it is aligned to VALUE bytes. If true, jump to the label. */
14514 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14516 rtx label = gen_label_rtx ();
14517 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14518 if (GET_MODE (variable) == DImode)
14519 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14521 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14522 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14525 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14527 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14531 /* Adjust COUNTER by the VALUE. */
14533 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14535 if (GET_MODE (countreg) == DImode)
14536 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14538 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14541 /* Zero extend possibly SImode EXP to Pmode register. */
14543 ix86_zero_extend_to_Pmode (rtx exp)
14546 if (GET_MODE (exp) == VOIDmode)
14547 return force_reg (Pmode, exp);
14548 if (GET_MODE (exp) == Pmode)
14549 return copy_to_mode_reg (Pmode, exp);
14550 r = gen_reg_rtx (Pmode);
14551 emit_insn (gen_zero_extendsidi2 (r, exp));
14555 /* Divide COUNTREG by SCALE. */
14557 scale_counter (rtx countreg, int scale)
14560 rtx piece_size_mask;
14564 if (CONST_INT_P (countreg))
14565 return GEN_INT (INTVAL (countreg) / scale);
14566 gcc_assert (REG_P (countreg));
14568 piece_size_mask = GEN_INT (scale - 1);
14569 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14570 GEN_INT (exact_log2 (scale)),
14571 NULL, 1, OPTAB_DIRECT);
14575 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14576 DImode for constant loop counts. */
14578 static enum machine_mode
14579 counter_mode (rtx count_exp)
14581 if (GET_MODE (count_exp) != VOIDmode)
14582 return GET_MODE (count_exp);
14583 if (GET_CODE (count_exp) != CONST_INT)
14585 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14590 /* When SRCPTR is non-NULL, output simple loop to move memory
14591 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14592 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14593 equivalent loop to set memory by VALUE (supposed to be in MODE).
14595 The size is rounded down to whole number of chunk size moved at once.
14596 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14600 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14601 rtx destptr, rtx srcptr, rtx value,
14602 rtx count, enum machine_mode mode, int unroll,
14605 rtx out_label, top_label, iter, tmp;
14606 enum machine_mode iter_mode = counter_mode (count);
14607 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14608 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14614 top_label = gen_label_rtx ();
14615 out_label = gen_label_rtx ();
14616 iter = gen_reg_rtx (iter_mode);
14618 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14619 NULL, 1, OPTAB_DIRECT);
14620 /* Those two should combine. */
14621 if (piece_size == const1_rtx)
14623 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14625 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14627 emit_move_insn (iter, const0_rtx);
14629 emit_label (top_label);
14631 tmp = convert_modes (Pmode, iter_mode, iter, true);
14632 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14633 destmem = change_address (destmem, mode, x_addr);
14637 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14638 srcmem = change_address (srcmem, mode, y_addr);
14640 /* When unrolling for chips that reorder memory reads and writes,
14641 we can save registers by using single temporary.
14642 Also using 4 temporaries is overkill in 32bit mode. */
14643 if (!TARGET_64BIT && 0)
14645 for (i = 0; i < unroll; i++)
14650 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14652 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14654 emit_move_insn (destmem, srcmem);
14660 gcc_assert (unroll <= 4);
14661 for (i = 0; i < unroll; i++)
14663 tmpreg[i] = gen_reg_rtx (mode);
14667 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14669 emit_move_insn (tmpreg[i], srcmem);
14671 for (i = 0; i < unroll; i++)
14676 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14678 emit_move_insn (destmem, tmpreg[i]);
14683 for (i = 0; i < unroll; i++)
14687 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14688 emit_move_insn (destmem, value);
14691 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14692 true, OPTAB_LIB_WIDEN);
14694 emit_move_insn (iter, tmp);
14696 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14698 if (expected_size != -1)
14700 expected_size /= GET_MODE_SIZE (mode) * unroll;
14701 if (expected_size == 0)
14703 else if (expected_size > REG_BR_PROB_BASE)
14704 predict_jump (REG_BR_PROB_BASE - 1);
14706 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14709 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14710 iter = ix86_zero_extend_to_Pmode (iter);
14711 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14712 true, OPTAB_LIB_WIDEN);
14713 if (tmp != destptr)
14714 emit_move_insn (destptr, tmp);
14717 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14718 true, OPTAB_LIB_WIDEN);
14720 emit_move_insn (srcptr, tmp);
14722 emit_label (out_label);
14725 /* Output "rep; mov" instruction.
14726 Arguments have same meaning as for previous function */
14728 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14729 rtx destptr, rtx srcptr,
14731 enum machine_mode mode)
14737 /* If the size is known, it is shorter to use rep movs. */
14738 if (mode == QImode && CONST_INT_P (count)
14739 && !(INTVAL (count) & 3))
14742 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14743 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14744 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14745 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14746 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14747 if (mode != QImode)
14749 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14750 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14751 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14752 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14753 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14754 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14758 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14759 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14761 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14765 /* Output "rep; stos" instruction.
14766 Arguments have same meaning as for previous function */
14768 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14770 enum machine_mode mode)
14775 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14776 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14777 value = force_reg (mode, gen_lowpart (mode, value));
14778 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14779 if (mode != QImode)
14781 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14782 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14783 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14786 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14787 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14791 emit_strmov (rtx destmem, rtx srcmem,
14792 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14794 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14795 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14796 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14799 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14801 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14802 rtx destptr, rtx srcptr, rtx count, int max_size)
14805 if (CONST_INT_P (count))
14807 HOST_WIDE_INT countval = INTVAL (count);
14810 if ((countval & 0x10) && max_size > 16)
14814 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14815 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14818 gcc_unreachable ();
14821 if ((countval & 0x08) && max_size > 8)
14824 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14827 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14828 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14832 if ((countval & 0x04) && max_size > 4)
14834 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14837 if ((countval & 0x02) && max_size > 2)
14839 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14842 if ((countval & 0x01) && max_size > 1)
14844 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14851 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14852 count, 1, OPTAB_DIRECT);
14853 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14854 count, QImode, 1, 4);
14858 /* When there are stringops, we can cheaply increase dest and src pointers.
14859 Otherwise we save code size by maintaining offset (zero is readily
14860 available from preceding rep operation) and using x86 addressing modes.
14862 if (TARGET_SINGLE_STRINGOP)
14866 rtx label = ix86_expand_aligntest (count, 4, true);
14867 src = change_address (srcmem, SImode, srcptr);
14868 dest = change_address (destmem, SImode, destptr);
14869 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14870 emit_label (label);
14871 LABEL_NUSES (label) = 1;
14875 rtx label = ix86_expand_aligntest (count, 2, true);
14876 src = change_address (srcmem, HImode, srcptr);
14877 dest = change_address (destmem, HImode, destptr);
14878 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14879 emit_label (label);
14880 LABEL_NUSES (label) = 1;
14884 rtx label = ix86_expand_aligntest (count, 1, true);
14885 src = change_address (srcmem, QImode, srcptr);
14886 dest = change_address (destmem, QImode, destptr);
14887 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14888 emit_label (label);
14889 LABEL_NUSES (label) = 1;
14894 rtx offset = force_reg (Pmode, const0_rtx);
14899 rtx label = ix86_expand_aligntest (count, 4, true);
14900 src = change_address (srcmem, SImode, srcptr);
14901 dest = change_address (destmem, SImode, destptr);
14902 emit_move_insn (dest, src);
14903 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14904 true, OPTAB_LIB_WIDEN);
14906 emit_move_insn (offset, tmp);
14907 emit_label (label);
14908 LABEL_NUSES (label) = 1;
14912 rtx label = ix86_expand_aligntest (count, 2, true);
14913 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14914 src = change_address (srcmem, HImode, tmp);
14915 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14916 dest = change_address (destmem, HImode, tmp);
14917 emit_move_insn (dest, src);
14918 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14919 true, OPTAB_LIB_WIDEN);
14921 emit_move_insn (offset, tmp);
14922 emit_label (label);
14923 LABEL_NUSES (label) = 1;
14927 rtx label = ix86_expand_aligntest (count, 1, true);
14928 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14929 src = change_address (srcmem, QImode, tmp);
14930 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14931 dest = change_address (destmem, QImode, tmp);
14932 emit_move_insn (dest, src);
14933 emit_label (label);
14934 LABEL_NUSES (label) = 1;
14939 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14941 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14942 rtx count, int max_size)
14945 expand_simple_binop (counter_mode (count), AND, count,
14946 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14947 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14948 gen_lowpart (QImode, value), count, QImode,
14952 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14954 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14958 if (CONST_INT_P (count))
14960 HOST_WIDE_INT countval = INTVAL (count);
14963 if ((countval & 0x10) && max_size > 16)
14967 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14968 emit_insn (gen_strset (destptr, dest, value));
14969 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14970 emit_insn (gen_strset (destptr, dest, value));
14973 gcc_unreachable ();
14976 if ((countval & 0x08) && max_size > 8)
14980 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14981 emit_insn (gen_strset (destptr, dest, value));
14985 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14986 emit_insn (gen_strset (destptr, dest, value));
14987 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14988 emit_insn (gen_strset (destptr, dest, value));
14992 if ((countval & 0x04) && max_size > 4)
14994 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14995 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14998 if ((countval & 0x02) && max_size > 2)
15000 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15001 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15004 if ((countval & 0x01) && max_size > 1)
15006 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15007 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15014 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15019 rtx label = ix86_expand_aligntest (count, 16, true);
15022 dest = change_address (destmem, DImode, destptr);
15023 emit_insn (gen_strset (destptr, dest, value));
15024 emit_insn (gen_strset (destptr, dest, value));
15028 dest = change_address (destmem, SImode, destptr);
15029 emit_insn (gen_strset (destptr, dest, value));
15030 emit_insn (gen_strset (destptr, dest, value));
15031 emit_insn (gen_strset (destptr, dest, value));
15032 emit_insn (gen_strset (destptr, dest, value));
15034 emit_label (label);
15035 LABEL_NUSES (label) = 1;
15039 rtx label = ix86_expand_aligntest (count, 8, true);
15042 dest = change_address (destmem, DImode, destptr);
15043 emit_insn (gen_strset (destptr, dest, value));
15047 dest = change_address (destmem, SImode, destptr);
15048 emit_insn (gen_strset (destptr, dest, value));
15049 emit_insn (gen_strset (destptr, dest, value));
15051 emit_label (label);
15052 LABEL_NUSES (label) = 1;
15056 rtx label = ix86_expand_aligntest (count, 4, true);
15057 dest = change_address (destmem, SImode, destptr);
15058 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15059 emit_label (label);
15060 LABEL_NUSES (label) = 1;
15064 rtx label = ix86_expand_aligntest (count, 2, true);
15065 dest = change_address (destmem, HImode, destptr);
15066 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15067 emit_label (label);
15068 LABEL_NUSES (label) = 1;
15072 rtx label = ix86_expand_aligntest (count, 1, true);
15073 dest = change_address (destmem, QImode, destptr);
15074 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15075 emit_label (label);
15076 LABEL_NUSES (label) = 1;
15080 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15081 DESIRED_ALIGNMENT. */
15083 expand_movmem_prologue (rtx destmem, rtx srcmem,
15084 rtx destptr, rtx srcptr, rtx count,
15085 int align, int desired_alignment)
15087 if (align <= 1 && desired_alignment > 1)
15089 rtx label = ix86_expand_aligntest (destptr, 1, false);
15090 srcmem = change_address (srcmem, QImode, srcptr);
15091 destmem = change_address (destmem, QImode, destptr);
15092 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15093 ix86_adjust_counter (count, 1);
15094 emit_label (label);
15095 LABEL_NUSES (label) = 1;
15097 if (align <= 2 && desired_alignment > 2)
15099 rtx label = ix86_expand_aligntest (destptr, 2, false);
15100 srcmem = change_address (srcmem, HImode, srcptr);
15101 destmem = change_address (destmem, HImode, destptr);
15102 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15103 ix86_adjust_counter (count, 2);
15104 emit_label (label);
15105 LABEL_NUSES (label) = 1;
15107 if (align <= 4 && desired_alignment > 4)
15109 rtx label = ix86_expand_aligntest (destptr, 4, false);
15110 srcmem = change_address (srcmem, SImode, srcptr);
15111 destmem = change_address (destmem, SImode, destptr);
15112 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15113 ix86_adjust_counter (count, 4);
15114 emit_label (label);
15115 LABEL_NUSES (label) = 1;
15117 gcc_assert (desired_alignment <= 8);
15120 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15121 DESIRED_ALIGNMENT. */
15123 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15124 int align, int desired_alignment)
15126 if (align <= 1 && desired_alignment > 1)
15128 rtx label = ix86_expand_aligntest (destptr, 1, false);
15129 destmem = change_address (destmem, QImode, destptr);
15130 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15131 ix86_adjust_counter (count, 1);
15132 emit_label (label);
15133 LABEL_NUSES (label) = 1;
15135 if (align <= 2 && desired_alignment > 2)
15137 rtx label = ix86_expand_aligntest (destptr, 2, false);
15138 destmem = change_address (destmem, HImode, destptr);
15139 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15140 ix86_adjust_counter (count, 2);
15141 emit_label (label);
15142 LABEL_NUSES (label) = 1;
15144 if (align <= 4 && desired_alignment > 4)
15146 rtx label = ix86_expand_aligntest (destptr, 4, false);
15147 destmem = change_address (destmem, SImode, destptr);
15148 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15149 ix86_adjust_counter (count, 4);
15150 emit_label (label);
15151 LABEL_NUSES (label) = 1;
15153 gcc_assert (desired_alignment <= 8);
15156 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15157 static enum stringop_alg
15158 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15159 int *dynamic_check)
15161 const struct stringop_algs * algs;
15162 /* Algorithms using the rep prefix want at least edi and ecx;
15163 additionally, memset wants eax and memcpy wants esi. Don't
15164 consider such algorithms if the user has appropriated those
15165 registers for their own purposes. */
15166 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15168 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15170 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15171 || (alg != rep_prefix_1_byte \
15172 && alg != rep_prefix_4_byte \
15173 && alg != rep_prefix_8_byte))
15175 *dynamic_check = -1;
15177 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15179 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15180 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15181 return stringop_alg;
15182 /* rep; movq or rep; movl is the smallest variant. */
15183 else if (optimize_size)
15185 if (!count || (count & 3))
15186 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15188 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15190 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15192 else if (expected_size != -1 && expected_size < 4)
15193 return loop_1_byte;
15194 else if (expected_size != -1)
15197 enum stringop_alg alg = libcall;
15198 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15200 /* We get here if the algorithms that were not libcall-based
15201 were rep-prefix based and we are unable to use rep prefixes
15202 based on global register usage. Break out of the loop and
15203 use the heuristic below. */
15204 if (algs->size[i].max == 0)
15206 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15208 enum stringop_alg candidate = algs->size[i].alg;
15210 if (candidate != libcall && ALG_USABLE_P (candidate))
15212 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15213 last non-libcall inline algorithm. */
15214 if (TARGET_INLINE_ALL_STRINGOPS)
15216 /* When the current size is best to be copied by a libcall,
15217 but we are still forced to inline, run the heuristic below
15218 that will pick code for medium sized blocks. */
15219 if (alg != libcall)
15223 else if (ALG_USABLE_P (candidate))
15227 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15229 /* When asked to inline the call anyway, try to pick meaningful choice.
15230 We look for maximal size of block that is faster to copy by hand and
15231 take blocks of at most of that size guessing that average size will
15232 be roughly half of the block.
15234 If this turns out to be bad, we might simply specify the preferred
15235 choice in ix86_costs. */
15236 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15237 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15240 enum stringop_alg alg;
15242 bool any_alg_usable_p = true;
15244 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15246 enum stringop_alg candidate = algs->size[i].alg;
15247 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15249 if (candidate != libcall && candidate
15250 && ALG_USABLE_P (candidate))
15251 max = algs->size[i].max;
15253 /* If there aren't any usable algorithms, then recursing on
15254 smaller sizes isn't going to find anything. Just return the
15255 simple byte-at-a-time copy loop. */
15256 if (!any_alg_usable_p)
15258 /* Pick something reasonable. */
15259 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15260 *dynamic_check = 128;
15261 return loop_1_byte;
15265 alg = decide_alg (count, max / 2, memset, dynamic_check);
15266 gcc_assert (*dynamic_check == -1);
15267 gcc_assert (alg != libcall);
15268 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15269 *dynamic_check = max;
15272 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15273 #undef ALG_USABLE_P
15276 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15277 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15279 decide_alignment (int align,
15280 enum stringop_alg alg,
15283 int desired_align = 0;
15287 gcc_unreachable ();
15289 case unrolled_loop:
15290 desired_align = GET_MODE_SIZE (Pmode);
15292 case rep_prefix_8_byte:
15295 case rep_prefix_4_byte:
15296 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15297 copying whole cacheline at once. */
15298 if (TARGET_PENTIUMPRO)
15303 case rep_prefix_1_byte:
15304 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15305 copying whole cacheline at once. */
15306 if (TARGET_PENTIUMPRO)
15320 if (desired_align < align)
15321 desired_align = align;
15322 if (expected_size != -1 && expected_size < 4)
15323 desired_align = align;
15324 return desired_align;
15327 /* Return the smallest power of 2 greater than VAL. */
15329 smallest_pow2_greater_than (int val)
15337 /* Expand string move (memcpy) operation. Use i386 string operations when
15338 profitable. expand_setmem contains similar code. The code depends upon
15339 architecture, block size and alignment, but always has the same
15342 1) Prologue guard: Conditional that jumps up to epilogues for small
15343 blocks that can be handled by epilogue alone. This is faster but
15344 also needed for correctness, since prologue assume the block is larger
15345 than the desired alignment.
15347 Optional dynamic check for size and libcall for large
15348 blocks is emitted here too, with -minline-stringops-dynamically.
15350 2) Prologue: copy first few bytes in order to get destination aligned
15351 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15352 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15353 We emit either a jump tree on power of two sized blocks, or a byte loop.
15355 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15356 with specified algorithm.
15358 4) Epilogue: code copying tail of the block that is too small to be
15359 handled by main body (or up to size guarded by prologue guard). */
15362 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15363 rtx expected_align_exp, rtx expected_size_exp)
15369 rtx jump_around_label = NULL;
15370 HOST_WIDE_INT align = 1;
15371 unsigned HOST_WIDE_INT count = 0;
15372 HOST_WIDE_INT expected_size = -1;
15373 int size_needed = 0, epilogue_size_needed;
15374 int desired_align = 0;
15375 enum stringop_alg alg;
15378 if (CONST_INT_P (align_exp))
15379 align = INTVAL (align_exp);
15380 /* i386 can do misaligned access on reasonably increased cost. */
15381 if (CONST_INT_P (expected_align_exp)
15382 && INTVAL (expected_align_exp) > align)
15383 align = INTVAL (expected_align_exp);
15384 if (CONST_INT_P (count_exp))
15385 count = expected_size = INTVAL (count_exp);
15386 if (CONST_INT_P (expected_size_exp) && count == 0)
15387 expected_size = INTVAL (expected_size_exp);
15389 /* Make sure we don't need to care about overflow later on. */
15390 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15393 /* Step 0: Decide on preferred algorithm, desired alignment and
15394 size of chunks to be copied by main loop. */
15396 alg = decide_alg (count, expected_size, false, &dynamic_check);
15397 desired_align = decide_alignment (align, alg, expected_size);
15399 if (!TARGET_ALIGN_STRINGOPS)
15400 align = desired_align;
15402 if (alg == libcall)
15404 gcc_assert (alg != no_stringop);
15406 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15407 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15408 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15413 gcc_unreachable ();
15415 size_needed = GET_MODE_SIZE (Pmode);
15417 case unrolled_loop:
15418 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15420 case rep_prefix_8_byte:
15423 case rep_prefix_4_byte:
15426 case rep_prefix_1_byte:
15432 epilogue_size_needed = size_needed;
15434 /* Step 1: Prologue guard. */
15436 /* Alignment code needs count to be in register. */
15437 if (CONST_INT_P (count_exp) && desired_align > align)
15438 count_exp = force_reg (counter_mode (count_exp), count_exp);
15439 gcc_assert (desired_align >= 1 && align >= 1);
15441 /* Ensure that alignment prologue won't copy past end of block. */
15442 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15444 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15445 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15446 Make sure it is power of 2. */
15447 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15449 if (CONST_INT_P (count_exp))
15451 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15456 label = gen_label_rtx ();
15457 emit_cmp_and_jump_insns (count_exp,
15458 GEN_INT (epilogue_size_needed),
15459 LTU, 0, counter_mode (count_exp), 1, label);
15460 if (expected_size == -1 || expected_size < epilogue_size_needed)
15461 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15463 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15467 /* Emit code to decide on runtime whether library call or inline should be
15469 if (dynamic_check != -1)
15471 if (CONST_INT_P (count_exp))
15473 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15475 emit_block_move_via_libcall (dst, src, count_exp, false);
15476 count_exp = const0_rtx;
15482 rtx hot_label = gen_label_rtx ();
15483 jump_around_label = gen_label_rtx ();
15484 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15485 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15486 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15487 emit_block_move_via_libcall (dst, src, count_exp, false);
15488 emit_jump (jump_around_label);
15489 emit_label (hot_label);
15493 /* Step 2: Alignment prologue. */
15495 if (desired_align > align)
15497 /* Except for the first move in epilogue, we no longer know
15498 constant offset in aliasing info. It don't seems to worth
15499 the pain to maintain it for the first move, so throw away
15501 src = change_address (src, BLKmode, srcreg);
15502 dst = change_address (dst, BLKmode, destreg);
15503 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15506 if (label && size_needed == 1)
15508 emit_label (label);
15509 LABEL_NUSES (label) = 1;
15513 /* Step 3: Main loop. */
15519 gcc_unreachable ();
15521 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15522 count_exp, QImode, 1, expected_size);
15525 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15526 count_exp, Pmode, 1, expected_size);
15528 case unrolled_loop:
15529 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15530 registers for 4 temporaries anyway. */
15531 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15532 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15535 case rep_prefix_8_byte:
15536 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15539 case rep_prefix_4_byte:
15540 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15543 case rep_prefix_1_byte:
15544 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15548 /* Adjust properly the offset of src and dest memory for aliasing. */
15549 if (CONST_INT_P (count_exp))
15551 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15552 (count / size_needed) * size_needed);
15553 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15554 (count / size_needed) * size_needed);
15558 src = change_address (src, BLKmode, srcreg);
15559 dst = change_address (dst, BLKmode, destreg);
15562 /* Step 4: Epilogue to copy the remaining bytes. */
15566 /* When the main loop is done, COUNT_EXP might hold original count,
15567 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15568 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15569 bytes. Compensate if needed. */
15571 if (size_needed < epilogue_size_needed)
15574 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15575 GEN_INT (size_needed - 1), count_exp, 1,
15577 if (tmp != count_exp)
15578 emit_move_insn (count_exp, tmp);
15580 emit_label (label);
15581 LABEL_NUSES (label) = 1;
15584 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15585 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15586 epilogue_size_needed);
15587 if (jump_around_label)
15588 emit_label (jump_around_label);
15592 /* Helper function for memcpy. For QImode value 0xXY produce
15593 0xXYXYXYXY of wide specified by MODE. This is essentially
15594 a * 0x10101010, but we can do slightly better than
15595 synth_mult by unwinding the sequence by hand on CPUs with
15598 promote_duplicated_reg (enum machine_mode mode, rtx val)
15600 enum machine_mode valmode = GET_MODE (val);
15602 int nops = mode == DImode ? 3 : 2;
15604 gcc_assert (mode == SImode || mode == DImode);
15605 if (val == const0_rtx)
15606 return copy_to_mode_reg (mode, const0_rtx);
15607 if (CONST_INT_P (val))
15609 HOST_WIDE_INT v = INTVAL (val) & 255;
15613 if (mode == DImode)
15614 v |= (v << 16) << 16;
15615 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15618 if (valmode == VOIDmode)
15620 if (valmode != QImode)
15621 val = gen_lowpart (QImode, val);
15622 if (mode == QImode)
15624 if (!TARGET_PARTIAL_REG_STALL)
15626 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15627 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15628 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15629 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15631 rtx reg = convert_modes (mode, QImode, val, true);
15632 tmp = promote_duplicated_reg (mode, const1_rtx);
15633 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15638 rtx reg = convert_modes (mode, QImode, val, true);
15640 if (!TARGET_PARTIAL_REG_STALL)
15641 if (mode == SImode)
15642 emit_insn (gen_movsi_insv_1 (reg, reg));
15644 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15647 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15648 NULL, 1, OPTAB_DIRECT);
15650 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15652 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15653 NULL, 1, OPTAB_DIRECT);
15654 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15655 if (mode == SImode)
15657 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15658 NULL, 1, OPTAB_DIRECT);
15659 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15664 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15665 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15666 alignment from ALIGN to DESIRED_ALIGN. */
15668 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15673 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15674 promoted_val = promote_duplicated_reg (DImode, val);
15675 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15676 promoted_val = promote_duplicated_reg (SImode, val);
15677 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15678 promoted_val = promote_duplicated_reg (HImode, val);
15680 promoted_val = val;
15682 return promoted_val;
15685 /* Expand string clear operation (bzero). Use i386 string operations when
15686 profitable. See expand_movmem comment for explanation of individual
15687 steps performed. */
15689 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15690 rtx expected_align_exp, rtx expected_size_exp)
15695 rtx jump_around_label = NULL;
15696 HOST_WIDE_INT align = 1;
15697 unsigned HOST_WIDE_INT count = 0;
15698 HOST_WIDE_INT expected_size = -1;
15699 int size_needed = 0, epilogue_size_needed;
15700 int desired_align = 0;
15701 enum stringop_alg alg;
15702 rtx promoted_val = NULL;
15703 bool force_loopy_epilogue = false;
15706 if (CONST_INT_P (align_exp))
15707 align = INTVAL (align_exp);
15708 /* i386 can do misaligned access on reasonably increased cost. */
15709 if (CONST_INT_P (expected_align_exp)
15710 && INTVAL (expected_align_exp) > align)
15711 align = INTVAL (expected_align_exp);
15712 if (CONST_INT_P (count_exp))
15713 count = expected_size = INTVAL (count_exp);
15714 if (CONST_INT_P (expected_size_exp) && count == 0)
15715 expected_size = INTVAL (expected_size_exp);
15717 /* Make sure we don't need to care about overflow later on. */
15718 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15721 /* Step 0: Decide on preferred algorithm, desired alignment and
15722 size of chunks to be copied by main loop. */
15724 alg = decide_alg (count, expected_size, true, &dynamic_check);
15725 desired_align = decide_alignment (align, alg, expected_size);
15727 if (!TARGET_ALIGN_STRINGOPS)
15728 align = desired_align;
15730 if (alg == libcall)
15732 gcc_assert (alg != no_stringop);
15734 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15735 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15740 gcc_unreachable ();
15742 size_needed = GET_MODE_SIZE (Pmode);
15744 case unrolled_loop:
15745 size_needed = GET_MODE_SIZE (Pmode) * 4;
15747 case rep_prefix_8_byte:
15750 case rep_prefix_4_byte:
15753 case rep_prefix_1_byte:
15758 epilogue_size_needed = size_needed;
15760 /* Step 1: Prologue guard. */
15762 /* Alignment code needs count to be in register. */
15763 if (CONST_INT_P (count_exp) && desired_align > align)
15765 enum machine_mode mode = SImode;
15766 if (TARGET_64BIT && (count & ~0xffffffff))
15768 count_exp = force_reg (mode, count_exp);
15770 /* Do the cheap promotion to allow better CSE across the
15771 main loop and epilogue (ie one load of the big constant in the
15772 front of all code. */
15773 if (CONST_INT_P (val_exp))
15774 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15775 desired_align, align);
15776 /* Ensure that alignment prologue won't copy past end of block. */
15777 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15779 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15780 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15781 Make sure it is power of 2. */
15782 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15784 /* To improve performance of small blocks, we jump around the VAL
15785 promoting mode. This mean that if the promoted VAL is not constant,
15786 we might not use it in the epilogue and have to use byte
15788 if (epilogue_size_needed > 2 && !promoted_val)
15789 force_loopy_epilogue = true;
15790 label = gen_label_rtx ();
15791 emit_cmp_and_jump_insns (count_exp,
15792 GEN_INT (epilogue_size_needed),
15793 LTU, 0, counter_mode (count_exp), 1, label);
15794 if (GET_CODE (count_exp) == CONST_INT)
15796 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15797 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15799 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15801 if (dynamic_check != -1)
15803 rtx hot_label = gen_label_rtx ();
15804 jump_around_label = gen_label_rtx ();
15805 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15806 LEU, 0, counter_mode (count_exp), 1, hot_label);
15807 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15808 set_storage_via_libcall (dst, count_exp, val_exp, false);
15809 emit_jump (jump_around_label);
15810 emit_label (hot_label);
15813 /* Step 2: Alignment prologue. */
15815 /* Do the expensive promotion once we branched off the small blocks. */
15817 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15818 desired_align, align);
15819 gcc_assert (desired_align >= 1 && align >= 1);
15821 if (desired_align > align)
15823 /* Except for the first move in epilogue, we no longer know
15824 constant offset in aliasing info. It don't seems to worth
15825 the pain to maintain it for the first move, so throw away
15827 dst = change_address (dst, BLKmode, destreg);
15828 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15831 if (label && size_needed == 1)
15833 emit_label (label);
15834 LABEL_NUSES (label) = 1;
15838 /* Step 3: Main loop. */
15844 gcc_unreachable ();
15846 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15847 count_exp, QImode, 1, expected_size);
15850 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15851 count_exp, Pmode, 1, expected_size);
15853 case unrolled_loop:
15854 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15855 count_exp, Pmode, 4, expected_size);
15857 case rep_prefix_8_byte:
15858 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15861 case rep_prefix_4_byte:
15862 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15865 case rep_prefix_1_byte:
15866 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15870 /* Adjust properly the offset of src and dest memory for aliasing. */
15871 if (CONST_INT_P (count_exp))
15872 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15873 (count / size_needed) * size_needed);
15875 dst = change_address (dst, BLKmode, destreg);
15877 /* Step 4: Epilogue to copy the remaining bytes. */
15881 /* When the main loop is done, COUNT_EXP might hold original count,
15882 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15883 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15884 bytes. Compensate if needed. */
15886 if (size_needed < desired_align - align)
15889 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15890 GEN_INT (size_needed - 1), count_exp, 1,
15892 size_needed = desired_align - align + 1;
15893 if (tmp != count_exp)
15894 emit_move_insn (count_exp, tmp);
15896 emit_label (label);
15897 LABEL_NUSES (label) = 1;
15899 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15901 if (force_loopy_epilogue)
15902 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15905 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15908 if (jump_around_label)
15909 emit_label (jump_around_label);
15913 /* Expand the appropriate insns for doing strlen if not just doing
15916 out = result, initialized with the start address
15917 align_rtx = alignment of the address.
15918 scratch = scratch register, initialized with the startaddress when
15919 not aligned, otherwise undefined
15921 This is just the body. It needs the initializations mentioned above and
15922 some address computing at the end. These things are done in i386.md. */
15925 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15929 rtx align_2_label = NULL_RTX;
15930 rtx align_3_label = NULL_RTX;
15931 rtx align_4_label = gen_label_rtx ();
15932 rtx end_0_label = gen_label_rtx ();
15934 rtx tmpreg = gen_reg_rtx (SImode);
15935 rtx scratch = gen_reg_rtx (SImode);
15939 if (CONST_INT_P (align_rtx))
15940 align = INTVAL (align_rtx);
15942 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15944 /* Is there a known alignment and is it less than 4? */
15947 rtx scratch1 = gen_reg_rtx (Pmode);
15948 emit_move_insn (scratch1, out);
15949 /* Is there a known alignment and is it not 2? */
15952 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15953 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15955 /* Leave just the 3 lower bits. */
15956 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15957 NULL_RTX, 0, OPTAB_WIDEN);
15959 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15960 Pmode, 1, align_4_label);
15961 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15962 Pmode, 1, align_2_label);
15963 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15964 Pmode, 1, align_3_label);
15968 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15969 check if is aligned to 4 - byte. */
15971 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15972 NULL_RTX, 0, OPTAB_WIDEN);
15974 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15975 Pmode, 1, align_4_label);
15978 mem = change_address (src, QImode, out);
15980 /* Now compare the bytes. */
15982 /* Compare the first n unaligned byte on a byte per byte basis. */
15983 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15984 QImode, 1, end_0_label);
15986 /* Increment the address. */
15988 emit_insn (gen_adddi3 (out, out, const1_rtx));
15990 emit_insn (gen_addsi3 (out, out, const1_rtx));
15992 /* Not needed with an alignment of 2 */
15995 emit_label (align_2_label);
15997 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16001 emit_insn (gen_adddi3 (out, out, const1_rtx));
16003 emit_insn (gen_addsi3 (out, out, const1_rtx));
16005 emit_label (align_3_label);
16008 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16012 emit_insn (gen_adddi3 (out, out, const1_rtx));
16014 emit_insn (gen_addsi3 (out, out, const1_rtx));
16017 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16018 align this loop. It gives only huge programs, but does not help to
16020 emit_label (align_4_label);
16022 mem = change_address (src, SImode, out);
16023 emit_move_insn (scratch, mem);
16025 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
16027 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
16029 /* This formula yields a nonzero result iff one of the bytes is zero.
16030 This saves three branches inside loop and many cycles. */
16032 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16033 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16034 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16035 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16036 gen_int_mode (0x80808080, SImode)));
16037 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16042 rtx reg = gen_reg_rtx (SImode);
16043 rtx reg2 = gen_reg_rtx (Pmode);
16044 emit_move_insn (reg, tmpreg);
16045 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16047 /* If zero is not in the first two bytes, move two bytes forward. */
16048 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16049 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16050 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16051 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16052 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16055 /* Emit lea manually to avoid clobbering of flags. */
16056 emit_insn (gen_rtx_SET (SImode, reg2,
16057 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16059 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16060 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16061 emit_insn (gen_rtx_SET (VOIDmode, out,
16062 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16069 rtx end_2_label = gen_label_rtx ();
16070 /* Is zero in the first two bytes? */
16072 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16073 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16074 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16075 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16076 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16078 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16079 JUMP_LABEL (tmp) = end_2_label;
16081 /* Not in the first two. Move two bytes forward. */
16082 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16084 emit_insn (gen_adddi3 (out, out, const2_rtx));
16086 emit_insn (gen_addsi3 (out, out, const2_rtx));
16088 emit_label (end_2_label);
16092 /* Avoid branch in fixing the byte. */
16093 tmpreg = gen_lowpart (QImode, tmpreg);
16094 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16095 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16097 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16099 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16101 emit_label (end_0_label);
16104 /* Expand strlen. */
16107 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16109 rtx addr, scratch1, scratch2, scratch3, scratch4;
16111 /* The generic case of strlen expander is long. Avoid it's
16112 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16114 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16115 && !TARGET_INLINE_ALL_STRINGOPS
16117 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16120 addr = force_reg (Pmode, XEXP (src, 0));
16121 scratch1 = gen_reg_rtx (Pmode);
16123 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16126 /* Well it seems that some optimizer does not combine a call like
16127 foo(strlen(bar), strlen(bar));
16128 when the move and the subtraction is done here. It does calculate
16129 the length just once when these instructions are done inside of
16130 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16131 often used and I use one fewer register for the lifetime of
16132 output_strlen_unroll() this is better. */
16134 emit_move_insn (out, addr);
16136 ix86_expand_strlensi_unroll_1 (out, src, align);
16138 /* strlensi_unroll_1 returns the address of the zero at the end of
16139 the string, like memchr(), so compute the length by subtracting
16140 the start address. */
16142 emit_insn (gen_subdi3 (out, out, addr));
16144 emit_insn (gen_subsi3 (out, out, addr));
16150 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16151 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16154 scratch2 = gen_reg_rtx (Pmode);
16155 scratch3 = gen_reg_rtx (Pmode);
16156 scratch4 = force_reg (Pmode, constm1_rtx);
16158 emit_move_insn (scratch3, addr);
16159 eoschar = force_reg (QImode, eoschar);
16161 src = replace_equiv_address_nv (src, scratch3);
16163 /* If .md starts supporting :P, this can be done in .md. */
16164 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16165 scratch4), UNSPEC_SCAS);
16166 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16169 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16170 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16174 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16175 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16181 /* For given symbol (function) construct code to compute address of it's PLT
16182 entry in large x86-64 PIC model. */
16184 construct_plt_address (rtx symbol)
16186 rtx tmp = gen_reg_rtx (Pmode);
16187 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16189 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16190 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16192 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16193 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16198 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16199 rtx callarg2 ATTRIBUTE_UNUSED,
16200 rtx pop, int sibcall)
16202 rtx use = NULL, call;
16204 if (pop == const0_rtx)
16206 gcc_assert (!TARGET_64BIT || !pop);
16208 if (TARGET_MACHO && !TARGET_64BIT)
16211 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16212 fnaddr = machopic_indirect_call_target (fnaddr);
16217 /* Static functions and indirect calls don't need the pic register. */
16218 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16219 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16220 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16221 use_reg (&use, pic_offset_table_rtx);
16224 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16226 rtx al = gen_rtx_REG (QImode, AX_REG);
16227 emit_move_insn (al, callarg2);
16228 use_reg (&use, al);
16231 if (ix86_cmodel == CM_LARGE_PIC
16232 && GET_CODE (fnaddr) == MEM
16233 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16234 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16235 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16236 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16238 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16239 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16241 if (sibcall && TARGET_64BIT
16242 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16245 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16246 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16247 emit_move_insn (fnaddr, addr);
16248 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16251 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16253 call = gen_rtx_SET (VOIDmode, retval, call);
16256 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16257 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16258 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16261 call = emit_call_insn (call);
16263 CALL_INSN_FUNCTION_USAGE (call) = use;
16267 /* Clear stack slot assignments remembered from previous functions.
16268 This is called from INIT_EXPANDERS once before RTL is emitted for each
16271 static struct machine_function *
16272 ix86_init_machine_status (void)
16274 struct machine_function *f;
16276 f = GGC_CNEW (struct machine_function);
16277 f->use_fast_prologue_epilogue_nregs = -1;
16278 f->tls_descriptor_call_expanded_p = 0;
16283 /* Return a MEM corresponding to a stack slot with mode MODE.
16284 Allocate a new slot if necessary.
16286 The RTL for a function can have several slots available: N is
16287 which slot to use. */
16290 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16292 struct stack_local_entry *s;
16294 gcc_assert (n < MAX_386_STACK_LOCALS);
16296 /* Virtual slot is valid only before vregs are instantiated. */
16297 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16299 for (s = ix86_stack_locals; s; s = s->next)
16300 if (s->mode == mode && s->n == n)
16301 return copy_rtx (s->rtl);
16303 s = (struct stack_local_entry *)
16304 ggc_alloc (sizeof (struct stack_local_entry));
16307 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16309 s->next = ix86_stack_locals;
16310 ix86_stack_locals = s;
16314 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16316 static GTY(()) rtx ix86_tls_symbol;
16318 ix86_tls_get_addr (void)
16321 if (!ix86_tls_symbol)
16323 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16324 (TARGET_ANY_GNU_TLS
16326 ? "___tls_get_addr"
16327 : "__tls_get_addr");
16330 return ix86_tls_symbol;
16333 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16335 static GTY(()) rtx ix86_tls_module_base_symbol;
16337 ix86_tls_module_base (void)
16340 if (!ix86_tls_module_base_symbol)
16342 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16343 "_TLS_MODULE_BASE_");
16344 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16345 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16348 return ix86_tls_module_base_symbol;
16351 /* Calculate the length of the memory address in the instruction
16352 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16355 memory_address_length (rtx addr)
16357 struct ix86_address parts;
16358 rtx base, index, disp;
16362 if (GET_CODE (addr) == PRE_DEC
16363 || GET_CODE (addr) == POST_INC
16364 || GET_CODE (addr) == PRE_MODIFY
16365 || GET_CODE (addr) == POST_MODIFY)
16368 ok = ix86_decompose_address (addr, &parts);
16371 if (parts.base && GET_CODE (parts.base) == SUBREG)
16372 parts.base = SUBREG_REG (parts.base);
16373 if (parts.index && GET_CODE (parts.index) == SUBREG)
16374 parts.index = SUBREG_REG (parts.index);
16377 index = parts.index;
16382 - esp as the base always wants an index,
16383 - ebp as the base always wants a displacement. */
16385 /* Register Indirect. */
16386 if (base && !index && !disp)
16388 /* esp (for its index) and ebp (for its displacement) need
16389 the two-byte modrm form. */
16390 if (addr == stack_pointer_rtx
16391 || addr == arg_pointer_rtx
16392 || addr == frame_pointer_rtx
16393 || addr == hard_frame_pointer_rtx)
16397 /* Direct Addressing. */
16398 else if (disp && !base && !index)
16403 /* Find the length of the displacement constant. */
16406 if (base && satisfies_constraint_K (disp))
16411 /* ebp always wants a displacement. */
16412 else if (base == hard_frame_pointer_rtx)
16415 /* An index requires the two-byte modrm form.... */
16417 /* ...like esp, which always wants an index. */
16418 || base == stack_pointer_rtx
16419 || base == arg_pointer_rtx
16420 || base == frame_pointer_rtx)
16427 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16428 is set, expect that insn have 8bit immediate alternative. */
16430 ix86_attr_length_immediate_default (rtx insn, int shortform)
16434 extract_insn_cached (insn);
16435 for (i = recog_data.n_operands - 1; i >= 0; --i)
16436 if (CONSTANT_P (recog_data.operand[i]))
16439 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16443 switch (get_attr_mode (insn))
16454 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16459 fatal_insn ("unknown insn mode", insn);
16465 /* Compute default value for "length_address" attribute. */
16467 ix86_attr_length_address_default (rtx insn)
16471 if (get_attr_type (insn) == TYPE_LEA)
16473 rtx set = PATTERN (insn);
16475 if (GET_CODE (set) == PARALLEL)
16476 set = XVECEXP (set, 0, 0);
16478 gcc_assert (GET_CODE (set) == SET);
16480 return memory_address_length (SET_SRC (set));
16483 extract_insn_cached (insn);
16484 for (i = recog_data.n_operands - 1; i >= 0; --i)
16485 if (MEM_P (recog_data.operand[i]))
16487 return memory_address_length (XEXP (recog_data.operand[i], 0));
16493 /* Return the maximum number of instructions a cpu can issue. */
16496 ix86_issue_rate (void)
16500 case PROCESSOR_PENTIUM:
16504 case PROCESSOR_PENTIUMPRO:
16505 case PROCESSOR_PENTIUM4:
16506 case PROCESSOR_ATHLON:
16508 case PROCESSOR_AMDFAM10:
16509 case PROCESSOR_NOCONA:
16510 case PROCESSOR_GENERIC32:
16511 case PROCESSOR_GENERIC64:
16514 case PROCESSOR_CORE2:
16522 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16523 by DEP_INSN and nothing set by DEP_INSN. */
16526 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16530 /* Simplify the test for uninteresting insns. */
16531 if (insn_type != TYPE_SETCC
16532 && insn_type != TYPE_ICMOV
16533 && insn_type != TYPE_FCMOV
16534 && insn_type != TYPE_IBR)
16537 if ((set = single_set (dep_insn)) != 0)
16539 set = SET_DEST (set);
16542 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16543 && XVECLEN (PATTERN (dep_insn), 0) == 2
16544 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16545 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16547 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16548 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16553 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16556 /* This test is true if the dependent insn reads the flags but
16557 not any other potentially set register. */
16558 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16561 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16567 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16568 address with operands set by DEP_INSN. */
16571 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16575 if (insn_type == TYPE_LEA
16578 addr = PATTERN (insn);
16580 if (GET_CODE (addr) == PARALLEL)
16581 addr = XVECEXP (addr, 0, 0);
16583 gcc_assert (GET_CODE (addr) == SET);
16585 addr = SET_SRC (addr);
16590 extract_insn_cached (insn);
16591 for (i = recog_data.n_operands - 1; i >= 0; --i)
16592 if (MEM_P (recog_data.operand[i]))
16594 addr = XEXP (recog_data.operand[i], 0);
16601 return modified_in_p (addr, dep_insn);
16605 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16607 enum attr_type insn_type, dep_insn_type;
16608 enum attr_memory memory;
16610 int dep_insn_code_number;
16612 /* Anti and output dependencies have zero cost on all CPUs. */
16613 if (REG_NOTE_KIND (link) != 0)
16616 dep_insn_code_number = recog_memoized (dep_insn);
16618 /* If we can't recognize the insns, we can't really do anything. */
16619 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16622 insn_type = get_attr_type (insn);
16623 dep_insn_type = get_attr_type (dep_insn);
16627 case PROCESSOR_PENTIUM:
16628 /* Address Generation Interlock adds a cycle of latency. */
16629 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16632 /* ??? Compares pair with jump/setcc. */
16633 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16636 /* Floating point stores require value to be ready one cycle earlier. */
16637 if (insn_type == TYPE_FMOV
16638 && get_attr_memory (insn) == MEMORY_STORE
16639 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16643 case PROCESSOR_PENTIUMPRO:
16644 memory = get_attr_memory (insn);
16646 /* INT->FP conversion is expensive. */
16647 if (get_attr_fp_int_src (dep_insn))
16650 /* There is one cycle extra latency between an FP op and a store. */
16651 if (insn_type == TYPE_FMOV
16652 && (set = single_set (dep_insn)) != NULL_RTX
16653 && (set2 = single_set (insn)) != NULL_RTX
16654 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16655 && MEM_P (SET_DEST (set2)))
16658 /* Show ability of reorder buffer to hide latency of load by executing
16659 in parallel with previous instruction in case
16660 previous instruction is not needed to compute the address. */
16661 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16662 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16664 /* Claim moves to take one cycle, as core can issue one load
16665 at time and the next load can start cycle later. */
16666 if (dep_insn_type == TYPE_IMOV
16667 || dep_insn_type == TYPE_FMOV)
16675 memory = get_attr_memory (insn);
16677 /* The esp dependency is resolved before the instruction is really
16679 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16680 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16683 /* INT->FP conversion is expensive. */
16684 if (get_attr_fp_int_src (dep_insn))
16687 /* Show ability of reorder buffer to hide latency of load by executing
16688 in parallel with previous instruction in case
16689 previous instruction is not needed to compute the address. */
16690 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16691 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16693 /* Claim moves to take one cycle, as core can issue one load
16694 at time and the next load can start cycle later. */
16695 if (dep_insn_type == TYPE_IMOV
16696 || dep_insn_type == TYPE_FMOV)
16705 case PROCESSOR_ATHLON:
16707 case PROCESSOR_AMDFAM10:
16708 case PROCESSOR_GENERIC32:
16709 case PROCESSOR_GENERIC64:
16710 memory = get_attr_memory (insn);
16712 /* Show ability of reorder buffer to hide latency of load by executing
16713 in parallel with previous instruction in case
16714 previous instruction is not needed to compute the address. */
16715 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16716 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16718 enum attr_unit unit = get_attr_unit (insn);
16721 /* Because of the difference between the length of integer and
16722 floating unit pipeline preparation stages, the memory operands
16723 for floating point are cheaper.
16725 ??? For Athlon it the difference is most probably 2. */
16726 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16729 loadcost = TARGET_ATHLON ? 2 : 0;
16731 if (cost >= loadcost)
16744 /* How many alternative schedules to try. This should be as wide as the
16745 scheduling freedom in the DFA, but no wider. Making this value too
16746 large results extra work for the scheduler. */
16749 ia32_multipass_dfa_lookahead (void)
16753 case PROCESSOR_PENTIUM:
16756 case PROCESSOR_PENTIUMPRO:
16766 /* Compute the alignment given to a constant that is being placed in memory.
16767 EXP is the constant and ALIGN is the alignment that the object would
16769 The value of this function is used instead of that alignment to align
16773 ix86_constant_alignment (tree exp, int align)
16775 if (TREE_CODE (exp) == REAL_CST)
16777 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16779 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16782 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16783 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16784 return BITS_PER_WORD;
16789 /* Compute the alignment for a static variable.
16790 TYPE is the data type, and ALIGN is the alignment that
16791 the object would ordinarily have. The value of this function is used
16792 instead of that alignment to align the object. */
16795 ix86_data_alignment (tree type, int align)
16797 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16799 if (AGGREGATE_TYPE_P (type)
16800 && TYPE_SIZE (type)
16801 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16802 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16803 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16804 && align < max_align)
16807 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16808 to 16byte boundary. */
16811 if (AGGREGATE_TYPE_P (type)
16812 && TYPE_SIZE (type)
16813 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16814 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16815 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16819 if (TREE_CODE (type) == ARRAY_TYPE)
16821 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16823 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16826 else if (TREE_CODE (type) == COMPLEX_TYPE)
16829 if (TYPE_MODE (type) == DCmode && align < 64)
16831 if (TYPE_MODE (type) == XCmode && align < 128)
16834 else if ((TREE_CODE (type) == RECORD_TYPE
16835 || TREE_CODE (type) == UNION_TYPE
16836 || TREE_CODE (type) == QUAL_UNION_TYPE)
16837 && TYPE_FIELDS (type))
16839 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16841 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16844 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16845 || TREE_CODE (type) == INTEGER_TYPE)
16847 if (TYPE_MODE (type) == DFmode && align < 64)
16849 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16856 /* Compute the alignment for a local variable.
16857 TYPE is the data type, and ALIGN is the alignment that
16858 the object would ordinarily have. The value of this macro is used
16859 instead of that alignment to align the object. */
16862 ix86_local_alignment (tree type, int align)
16864 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16865 to 16byte boundary. */
16868 if (AGGREGATE_TYPE_P (type)
16869 && TYPE_SIZE (type)
16870 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16871 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16872 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16875 if (TREE_CODE (type) == ARRAY_TYPE)
16877 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16879 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16882 else if (TREE_CODE (type) == COMPLEX_TYPE)
16884 if (TYPE_MODE (type) == DCmode && align < 64)
16886 if (TYPE_MODE (type) == XCmode && align < 128)
16889 else if ((TREE_CODE (type) == RECORD_TYPE
16890 || TREE_CODE (type) == UNION_TYPE
16891 || TREE_CODE (type) == QUAL_UNION_TYPE)
16892 && TYPE_FIELDS (type))
16894 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16896 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16899 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16900 || TREE_CODE (type) == INTEGER_TYPE)
16903 if (TYPE_MODE (type) == DFmode && align < 64)
16905 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16911 /* Emit RTL insns to initialize the variable parts of a trampoline.
16912 FNADDR is an RTX for the address of the function's pure code.
16913 CXT is an RTX for the static chain value for the function. */
16915 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16919 /* Compute offset from the end of the jmp to the target function. */
16920 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16921 plus_constant (tramp, 10),
16922 NULL_RTX, 1, OPTAB_DIRECT);
16923 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16924 gen_int_mode (0xb9, QImode));
16925 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16926 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16927 gen_int_mode (0xe9, QImode));
16928 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16933 /* Try to load address using shorter movl instead of movabs.
16934 We may want to support movq for kernel mode, but kernel does not use
16935 trampolines at the moment. */
16936 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16938 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16939 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16940 gen_int_mode (0xbb41, HImode));
16941 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16942 gen_lowpart (SImode, fnaddr));
16947 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16948 gen_int_mode (0xbb49, HImode));
16949 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16953 /* Load static chain using movabs to r10. */
16954 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16955 gen_int_mode (0xba49, HImode));
16956 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16959 /* Jump to the r11 */
16960 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16961 gen_int_mode (0xff49, HImode));
16962 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16963 gen_int_mode (0xe3, QImode));
16965 gcc_assert (offset <= TRAMPOLINE_SIZE);
16968 #ifdef ENABLE_EXECUTE_STACK
16969 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16970 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16974 /* Codes for all the SSE/MMX builtins. */
16977 IX86_BUILTIN_ADDPS,
16978 IX86_BUILTIN_ADDSS,
16979 IX86_BUILTIN_DIVPS,
16980 IX86_BUILTIN_DIVSS,
16981 IX86_BUILTIN_MULPS,
16982 IX86_BUILTIN_MULSS,
16983 IX86_BUILTIN_SUBPS,
16984 IX86_BUILTIN_SUBSS,
16986 IX86_BUILTIN_CMPEQPS,
16987 IX86_BUILTIN_CMPLTPS,
16988 IX86_BUILTIN_CMPLEPS,
16989 IX86_BUILTIN_CMPGTPS,
16990 IX86_BUILTIN_CMPGEPS,
16991 IX86_BUILTIN_CMPNEQPS,
16992 IX86_BUILTIN_CMPNLTPS,
16993 IX86_BUILTIN_CMPNLEPS,
16994 IX86_BUILTIN_CMPNGTPS,
16995 IX86_BUILTIN_CMPNGEPS,
16996 IX86_BUILTIN_CMPORDPS,
16997 IX86_BUILTIN_CMPUNORDPS,
16998 IX86_BUILTIN_CMPEQSS,
16999 IX86_BUILTIN_CMPLTSS,
17000 IX86_BUILTIN_CMPLESS,
17001 IX86_BUILTIN_CMPNEQSS,
17002 IX86_BUILTIN_CMPNLTSS,
17003 IX86_BUILTIN_CMPNLESS,
17004 IX86_BUILTIN_CMPNGTSS,
17005 IX86_BUILTIN_CMPNGESS,
17006 IX86_BUILTIN_CMPORDSS,
17007 IX86_BUILTIN_CMPUNORDSS,
17009 IX86_BUILTIN_COMIEQSS,
17010 IX86_BUILTIN_COMILTSS,
17011 IX86_BUILTIN_COMILESS,
17012 IX86_BUILTIN_COMIGTSS,
17013 IX86_BUILTIN_COMIGESS,
17014 IX86_BUILTIN_COMINEQSS,
17015 IX86_BUILTIN_UCOMIEQSS,
17016 IX86_BUILTIN_UCOMILTSS,
17017 IX86_BUILTIN_UCOMILESS,
17018 IX86_BUILTIN_UCOMIGTSS,
17019 IX86_BUILTIN_UCOMIGESS,
17020 IX86_BUILTIN_UCOMINEQSS,
17022 IX86_BUILTIN_CVTPI2PS,
17023 IX86_BUILTIN_CVTPS2PI,
17024 IX86_BUILTIN_CVTSI2SS,
17025 IX86_BUILTIN_CVTSI642SS,
17026 IX86_BUILTIN_CVTSS2SI,
17027 IX86_BUILTIN_CVTSS2SI64,
17028 IX86_BUILTIN_CVTTPS2PI,
17029 IX86_BUILTIN_CVTTSS2SI,
17030 IX86_BUILTIN_CVTTSS2SI64,
17032 IX86_BUILTIN_MAXPS,
17033 IX86_BUILTIN_MAXSS,
17034 IX86_BUILTIN_MINPS,
17035 IX86_BUILTIN_MINSS,
17037 IX86_BUILTIN_LOADUPS,
17038 IX86_BUILTIN_STOREUPS,
17039 IX86_BUILTIN_MOVSS,
17041 IX86_BUILTIN_MOVHLPS,
17042 IX86_BUILTIN_MOVLHPS,
17043 IX86_BUILTIN_LOADHPS,
17044 IX86_BUILTIN_LOADLPS,
17045 IX86_BUILTIN_STOREHPS,
17046 IX86_BUILTIN_STORELPS,
17048 IX86_BUILTIN_MASKMOVQ,
17049 IX86_BUILTIN_MOVMSKPS,
17050 IX86_BUILTIN_PMOVMSKB,
17052 IX86_BUILTIN_MOVNTPS,
17053 IX86_BUILTIN_MOVNTQ,
17055 IX86_BUILTIN_LOADDQU,
17056 IX86_BUILTIN_STOREDQU,
17058 IX86_BUILTIN_PACKSSWB,
17059 IX86_BUILTIN_PACKSSDW,
17060 IX86_BUILTIN_PACKUSWB,
17062 IX86_BUILTIN_PADDB,
17063 IX86_BUILTIN_PADDW,
17064 IX86_BUILTIN_PADDD,
17065 IX86_BUILTIN_PADDQ,
17066 IX86_BUILTIN_PADDSB,
17067 IX86_BUILTIN_PADDSW,
17068 IX86_BUILTIN_PADDUSB,
17069 IX86_BUILTIN_PADDUSW,
17070 IX86_BUILTIN_PSUBB,
17071 IX86_BUILTIN_PSUBW,
17072 IX86_BUILTIN_PSUBD,
17073 IX86_BUILTIN_PSUBQ,
17074 IX86_BUILTIN_PSUBSB,
17075 IX86_BUILTIN_PSUBSW,
17076 IX86_BUILTIN_PSUBUSB,
17077 IX86_BUILTIN_PSUBUSW,
17080 IX86_BUILTIN_PANDN,
17084 IX86_BUILTIN_PAVGB,
17085 IX86_BUILTIN_PAVGW,
17087 IX86_BUILTIN_PCMPEQB,
17088 IX86_BUILTIN_PCMPEQW,
17089 IX86_BUILTIN_PCMPEQD,
17090 IX86_BUILTIN_PCMPGTB,
17091 IX86_BUILTIN_PCMPGTW,
17092 IX86_BUILTIN_PCMPGTD,
17094 IX86_BUILTIN_PMADDWD,
17096 IX86_BUILTIN_PMAXSW,
17097 IX86_BUILTIN_PMAXUB,
17098 IX86_BUILTIN_PMINSW,
17099 IX86_BUILTIN_PMINUB,
17101 IX86_BUILTIN_PMULHUW,
17102 IX86_BUILTIN_PMULHW,
17103 IX86_BUILTIN_PMULLW,
17105 IX86_BUILTIN_PSADBW,
17106 IX86_BUILTIN_PSHUFW,
17108 IX86_BUILTIN_PSLLW,
17109 IX86_BUILTIN_PSLLD,
17110 IX86_BUILTIN_PSLLQ,
17111 IX86_BUILTIN_PSRAW,
17112 IX86_BUILTIN_PSRAD,
17113 IX86_BUILTIN_PSRLW,
17114 IX86_BUILTIN_PSRLD,
17115 IX86_BUILTIN_PSRLQ,
17116 IX86_BUILTIN_PSLLWI,
17117 IX86_BUILTIN_PSLLDI,
17118 IX86_BUILTIN_PSLLQI,
17119 IX86_BUILTIN_PSRAWI,
17120 IX86_BUILTIN_PSRADI,
17121 IX86_BUILTIN_PSRLWI,
17122 IX86_BUILTIN_PSRLDI,
17123 IX86_BUILTIN_PSRLQI,
17125 IX86_BUILTIN_PUNPCKHBW,
17126 IX86_BUILTIN_PUNPCKHWD,
17127 IX86_BUILTIN_PUNPCKHDQ,
17128 IX86_BUILTIN_PUNPCKLBW,
17129 IX86_BUILTIN_PUNPCKLWD,
17130 IX86_BUILTIN_PUNPCKLDQ,
17132 IX86_BUILTIN_SHUFPS,
17134 IX86_BUILTIN_RCPPS,
17135 IX86_BUILTIN_RCPSS,
17136 IX86_BUILTIN_RSQRTPS,
17137 IX86_BUILTIN_RSQRTPS_NR,
17138 IX86_BUILTIN_RSQRTSS,
17139 IX86_BUILTIN_RSQRTF,
17140 IX86_BUILTIN_SQRTPS,
17141 IX86_BUILTIN_SQRTPS_NR,
17142 IX86_BUILTIN_SQRTSS,
17144 IX86_BUILTIN_UNPCKHPS,
17145 IX86_BUILTIN_UNPCKLPS,
17147 IX86_BUILTIN_ANDPS,
17148 IX86_BUILTIN_ANDNPS,
17150 IX86_BUILTIN_XORPS,
17153 IX86_BUILTIN_LDMXCSR,
17154 IX86_BUILTIN_STMXCSR,
17155 IX86_BUILTIN_SFENCE,
17157 /* 3DNow! Original */
17158 IX86_BUILTIN_FEMMS,
17159 IX86_BUILTIN_PAVGUSB,
17160 IX86_BUILTIN_PF2ID,
17161 IX86_BUILTIN_PFACC,
17162 IX86_BUILTIN_PFADD,
17163 IX86_BUILTIN_PFCMPEQ,
17164 IX86_BUILTIN_PFCMPGE,
17165 IX86_BUILTIN_PFCMPGT,
17166 IX86_BUILTIN_PFMAX,
17167 IX86_BUILTIN_PFMIN,
17168 IX86_BUILTIN_PFMUL,
17169 IX86_BUILTIN_PFRCP,
17170 IX86_BUILTIN_PFRCPIT1,
17171 IX86_BUILTIN_PFRCPIT2,
17172 IX86_BUILTIN_PFRSQIT1,
17173 IX86_BUILTIN_PFRSQRT,
17174 IX86_BUILTIN_PFSUB,
17175 IX86_BUILTIN_PFSUBR,
17176 IX86_BUILTIN_PI2FD,
17177 IX86_BUILTIN_PMULHRW,
17179 /* 3DNow! Athlon Extensions */
17180 IX86_BUILTIN_PF2IW,
17181 IX86_BUILTIN_PFNACC,
17182 IX86_BUILTIN_PFPNACC,
17183 IX86_BUILTIN_PI2FW,
17184 IX86_BUILTIN_PSWAPDSI,
17185 IX86_BUILTIN_PSWAPDSF,
17188 IX86_BUILTIN_ADDPD,
17189 IX86_BUILTIN_ADDSD,
17190 IX86_BUILTIN_DIVPD,
17191 IX86_BUILTIN_DIVSD,
17192 IX86_BUILTIN_MULPD,
17193 IX86_BUILTIN_MULSD,
17194 IX86_BUILTIN_SUBPD,
17195 IX86_BUILTIN_SUBSD,
17197 IX86_BUILTIN_CMPEQPD,
17198 IX86_BUILTIN_CMPLTPD,
17199 IX86_BUILTIN_CMPLEPD,
17200 IX86_BUILTIN_CMPGTPD,
17201 IX86_BUILTIN_CMPGEPD,
17202 IX86_BUILTIN_CMPNEQPD,
17203 IX86_BUILTIN_CMPNLTPD,
17204 IX86_BUILTIN_CMPNLEPD,
17205 IX86_BUILTIN_CMPNGTPD,
17206 IX86_BUILTIN_CMPNGEPD,
17207 IX86_BUILTIN_CMPORDPD,
17208 IX86_BUILTIN_CMPUNORDPD,
17209 IX86_BUILTIN_CMPEQSD,
17210 IX86_BUILTIN_CMPLTSD,
17211 IX86_BUILTIN_CMPLESD,
17212 IX86_BUILTIN_CMPNEQSD,
17213 IX86_BUILTIN_CMPNLTSD,
17214 IX86_BUILTIN_CMPNLESD,
17215 IX86_BUILTIN_CMPORDSD,
17216 IX86_BUILTIN_CMPUNORDSD,
17218 IX86_BUILTIN_COMIEQSD,
17219 IX86_BUILTIN_COMILTSD,
17220 IX86_BUILTIN_COMILESD,
17221 IX86_BUILTIN_COMIGTSD,
17222 IX86_BUILTIN_COMIGESD,
17223 IX86_BUILTIN_COMINEQSD,
17224 IX86_BUILTIN_UCOMIEQSD,
17225 IX86_BUILTIN_UCOMILTSD,
17226 IX86_BUILTIN_UCOMILESD,
17227 IX86_BUILTIN_UCOMIGTSD,
17228 IX86_BUILTIN_UCOMIGESD,
17229 IX86_BUILTIN_UCOMINEQSD,
17231 IX86_BUILTIN_MAXPD,
17232 IX86_BUILTIN_MAXSD,
17233 IX86_BUILTIN_MINPD,
17234 IX86_BUILTIN_MINSD,
17236 IX86_BUILTIN_ANDPD,
17237 IX86_BUILTIN_ANDNPD,
17239 IX86_BUILTIN_XORPD,
17241 IX86_BUILTIN_SQRTPD,
17242 IX86_BUILTIN_SQRTSD,
17244 IX86_BUILTIN_UNPCKHPD,
17245 IX86_BUILTIN_UNPCKLPD,
17247 IX86_BUILTIN_SHUFPD,
17249 IX86_BUILTIN_LOADUPD,
17250 IX86_BUILTIN_STOREUPD,
17251 IX86_BUILTIN_MOVSD,
17253 IX86_BUILTIN_LOADHPD,
17254 IX86_BUILTIN_LOADLPD,
17256 IX86_BUILTIN_CVTDQ2PD,
17257 IX86_BUILTIN_CVTDQ2PS,
17259 IX86_BUILTIN_CVTPD2DQ,
17260 IX86_BUILTIN_CVTPD2PI,
17261 IX86_BUILTIN_CVTPD2PS,
17262 IX86_BUILTIN_CVTTPD2DQ,
17263 IX86_BUILTIN_CVTTPD2PI,
17265 IX86_BUILTIN_CVTPI2PD,
17266 IX86_BUILTIN_CVTSI2SD,
17267 IX86_BUILTIN_CVTSI642SD,
17269 IX86_BUILTIN_CVTSD2SI,
17270 IX86_BUILTIN_CVTSD2SI64,
17271 IX86_BUILTIN_CVTSD2SS,
17272 IX86_BUILTIN_CVTSS2SD,
17273 IX86_BUILTIN_CVTTSD2SI,
17274 IX86_BUILTIN_CVTTSD2SI64,
17276 IX86_BUILTIN_CVTPS2DQ,
17277 IX86_BUILTIN_CVTPS2PD,
17278 IX86_BUILTIN_CVTTPS2DQ,
17280 IX86_BUILTIN_MOVNTI,
17281 IX86_BUILTIN_MOVNTPD,
17282 IX86_BUILTIN_MOVNTDQ,
17285 IX86_BUILTIN_MASKMOVDQU,
17286 IX86_BUILTIN_MOVMSKPD,
17287 IX86_BUILTIN_PMOVMSKB128,
17289 IX86_BUILTIN_PACKSSWB128,
17290 IX86_BUILTIN_PACKSSDW128,
17291 IX86_BUILTIN_PACKUSWB128,
17293 IX86_BUILTIN_PADDB128,
17294 IX86_BUILTIN_PADDW128,
17295 IX86_BUILTIN_PADDD128,
17296 IX86_BUILTIN_PADDQ128,
17297 IX86_BUILTIN_PADDSB128,
17298 IX86_BUILTIN_PADDSW128,
17299 IX86_BUILTIN_PADDUSB128,
17300 IX86_BUILTIN_PADDUSW128,
17301 IX86_BUILTIN_PSUBB128,
17302 IX86_BUILTIN_PSUBW128,
17303 IX86_BUILTIN_PSUBD128,
17304 IX86_BUILTIN_PSUBQ128,
17305 IX86_BUILTIN_PSUBSB128,
17306 IX86_BUILTIN_PSUBSW128,
17307 IX86_BUILTIN_PSUBUSB128,
17308 IX86_BUILTIN_PSUBUSW128,
17310 IX86_BUILTIN_PAND128,
17311 IX86_BUILTIN_PANDN128,
17312 IX86_BUILTIN_POR128,
17313 IX86_BUILTIN_PXOR128,
17315 IX86_BUILTIN_PAVGB128,
17316 IX86_BUILTIN_PAVGW128,
17318 IX86_BUILTIN_PCMPEQB128,
17319 IX86_BUILTIN_PCMPEQW128,
17320 IX86_BUILTIN_PCMPEQD128,
17321 IX86_BUILTIN_PCMPGTB128,
17322 IX86_BUILTIN_PCMPGTW128,
17323 IX86_BUILTIN_PCMPGTD128,
17325 IX86_BUILTIN_PMADDWD128,
17327 IX86_BUILTIN_PMAXSW128,
17328 IX86_BUILTIN_PMAXUB128,
17329 IX86_BUILTIN_PMINSW128,
17330 IX86_BUILTIN_PMINUB128,
17332 IX86_BUILTIN_PMULUDQ,
17333 IX86_BUILTIN_PMULUDQ128,
17334 IX86_BUILTIN_PMULHUW128,
17335 IX86_BUILTIN_PMULHW128,
17336 IX86_BUILTIN_PMULLW128,
17338 IX86_BUILTIN_PSADBW128,
17339 IX86_BUILTIN_PSHUFHW,
17340 IX86_BUILTIN_PSHUFLW,
17341 IX86_BUILTIN_PSHUFD,
17343 IX86_BUILTIN_PSLLDQI128,
17344 IX86_BUILTIN_PSLLWI128,
17345 IX86_BUILTIN_PSLLDI128,
17346 IX86_BUILTIN_PSLLQI128,
17347 IX86_BUILTIN_PSRAWI128,
17348 IX86_BUILTIN_PSRADI128,
17349 IX86_BUILTIN_PSRLDQI128,
17350 IX86_BUILTIN_PSRLWI128,
17351 IX86_BUILTIN_PSRLDI128,
17352 IX86_BUILTIN_PSRLQI128,
17354 IX86_BUILTIN_PSLLDQ128,
17355 IX86_BUILTIN_PSLLW128,
17356 IX86_BUILTIN_PSLLD128,
17357 IX86_BUILTIN_PSLLQ128,
17358 IX86_BUILTIN_PSRAW128,
17359 IX86_BUILTIN_PSRAD128,
17360 IX86_BUILTIN_PSRLW128,
17361 IX86_BUILTIN_PSRLD128,
17362 IX86_BUILTIN_PSRLQ128,
17364 IX86_BUILTIN_PUNPCKHBW128,
17365 IX86_BUILTIN_PUNPCKHWD128,
17366 IX86_BUILTIN_PUNPCKHDQ128,
17367 IX86_BUILTIN_PUNPCKHQDQ128,
17368 IX86_BUILTIN_PUNPCKLBW128,
17369 IX86_BUILTIN_PUNPCKLWD128,
17370 IX86_BUILTIN_PUNPCKLDQ128,
17371 IX86_BUILTIN_PUNPCKLQDQ128,
17373 IX86_BUILTIN_CLFLUSH,
17374 IX86_BUILTIN_MFENCE,
17375 IX86_BUILTIN_LFENCE,
17377 /* Prescott New Instructions. */
17378 IX86_BUILTIN_ADDSUBPS,
17379 IX86_BUILTIN_HADDPS,
17380 IX86_BUILTIN_HSUBPS,
17381 IX86_BUILTIN_MOVSHDUP,
17382 IX86_BUILTIN_MOVSLDUP,
17383 IX86_BUILTIN_ADDSUBPD,
17384 IX86_BUILTIN_HADDPD,
17385 IX86_BUILTIN_HSUBPD,
17386 IX86_BUILTIN_LDDQU,
17388 IX86_BUILTIN_MONITOR,
17389 IX86_BUILTIN_MWAIT,
17392 IX86_BUILTIN_PHADDW,
17393 IX86_BUILTIN_PHADDD,
17394 IX86_BUILTIN_PHADDSW,
17395 IX86_BUILTIN_PHSUBW,
17396 IX86_BUILTIN_PHSUBD,
17397 IX86_BUILTIN_PHSUBSW,
17398 IX86_BUILTIN_PMADDUBSW,
17399 IX86_BUILTIN_PMULHRSW,
17400 IX86_BUILTIN_PSHUFB,
17401 IX86_BUILTIN_PSIGNB,
17402 IX86_BUILTIN_PSIGNW,
17403 IX86_BUILTIN_PSIGND,
17404 IX86_BUILTIN_PALIGNR,
17405 IX86_BUILTIN_PABSB,
17406 IX86_BUILTIN_PABSW,
17407 IX86_BUILTIN_PABSD,
17409 IX86_BUILTIN_PHADDW128,
17410 IX86_BUILTIN_PHADDD128,
17411 IX86_BUILTIN_PHADDSW128,
17412 IX86_BUILTIN_PHSUBW128,
17413 IX86_BUILTIN_PHSUBD128,
17414 IX86_BUILTIN_PHSUBSW128,
17415 IX86_BUILTIN_PMADDUBSW128,
17416 IX86_BUILTIN_PMULHRSW128,
17417 IX86_BUILTIN_PSHUFB128,
17418 IX86_BUILTIN_PSIGNB128,
17419 IX86_BUILTIN_PSIGNW128,
17420 IX86_BUILTIN_PSIGND128,
17421 IX86_BUILTIN_PALIGNR128,
17422 IX86_BUILTIN_PABSB128,
17423 IX86_BUILTIN_PABSW128,
17424 IX86_BUILTIN_PABSD128,
17426 /* AMDFAM10 - SSE4A New Instructions. */
17427 IX86_BUILTIN_MOVNTSD,
17428 IX86_BUILTIN_MOVNTSS,
17429 IX86_BUILTIN_EXTRQI,
17430 IX86_BUILTIN_EXTRQ,
17431 IX86_BUILTIN_INSERTQI,
17432 IX86_BUILTIN_INSERTQ,
17435 IX86_BUILTIN_BLENDPD,
17436 IX86_BUILTIN_BLENDPS,
17437 IX86_BUILTIN_BLENDVPD,
17438 IX86_BUILTIN_BLENDVPS,
17439 IX86_BUILTIN_PBLENDVB128,
17440 IX86_BUILTIN_PBLENDW128,
17445 IX86_BUILTIN_INSERTPS128,
17447 IX86_BUILTIN_MOVNTDQA,
17448 IX86_BUILTIN_MPSADBW128,
17449 IX86_BUILTIN_PACKUSDW128,
17450 IX86_BUILTIN_PCMPEQQ,
17451 IX86_BUILTIN_PHMINPOSUW128,
17453 IX86_BUILTIN_PMAXSB128,
17454 IX86_BUILTIN_PMAXSD128,
17455 IX86_BUILTIN_PMAXUD128,
17456 IX86_BUILTIN_PMAXUW128,
17458 IX86_BUILTIN_PMINSB128,
17459 IX86_BUILTIN_PMINSD128,
17460 IX86_BUILTIN_PMINUD128,
17461 IX86_BUILTIN_PMINUW128,
17463 IX86_BUILTIN_PMOVSXBW128,
17464 IX86_BUILTIN_PMOVSXBD128,
17465 IX86_BUILTIN_PMOVSXBQ128,
17466 IX86_BUILTIN_PMOVSXWD128,
17467 IX86_BUILTIN_PMOVSXWQ128,
17468 IX86_BUILTIN_PMOVSXDQ128,
17470 IX86_BUILTIN_PMOVZXBW128,
17471 IX86_BUILTIN_PMOVZXBD128,
17472 IX86_BUILTIN_PMOVZXBQ128,
17473 IX86_BUILTIN_PMOVZXWD128,
17474 IX86_BUILTIN_PMOVZXWQ128,
17475 IX86_BUILTIN_PMOVZXDQ128,
17477 IX86_BUILTIN_PMULDQ128,
17478 IX86_BUILTIN_PMULLD128,
17480 IX86_BUILTIN_ROUNDPD,
17481 IX86_BUILTIN_ROUNDPS,
17482 IX86_BUILTIN_ROUNDSD,
17483 IX86_BUILTIN_ROUNDSS,
17485 IX86_BUILTIN_PTESTZ,
17486 IX86_BUILTIN_PTESTC,
17487 IX86_BUILTIN_PTESTNZC,
17489 IX86_BUILTIN_VEC_INIT_V2SI,
17490 IX86_BUILTIN_VEC_INIT_V4HI,
17491 IX86_BUILTIN_VEC_INIT_V8QI,
17492 IX86_BUILTIN_VEC_EXT_V2DF,
17493 IX86_BUILTIN_VEC_EXT_V2DI,
17494 IX86_BUILTIN_VEC_EXT_V4SF,
17495 IX86_BUILTIN_VEC_EXT_V4SI,
17496 IX86_BUILTIN_VEC_EXT_V8HI,
17497 IX86_BUILTIN_VEC_EXT_V2SI,
17498 IX86_BUILTIN_VEC_EXT_V4HI,
17499 IX86_BUILTIN_VEC_EXT_V16QI,
17500 IX86_BUILTIN_VEC_SET_V2DI,
17501 IX86_BUILTIN_VEC_SET_V4SF,
17502 IX86_BUILTIN_VEC_SET_V4SI,
17503 IX86_BUILTIN_VEC_SET_V8HI,
17504 IX86_BUILTIN_VEC_SET_V4HI,
17505 IX86_BUILTIN_VEC_SET_V16QI,
17507 IX86_BUILTIN_VEC_PACK_SFIX,
17510 IX86_BUILTIN_CRC32QI,
17511 IX86_BUILTIN_CRC32HI,
17512 IX86_BUILTIN_CRC32SI,
17513 IX86_BUILTIN_CRC32DI,
17515 IX86_BUILTIN_PCMPESTRI128,
17516 IX86_BUILTIN_PCMPESTRM128,
17517 IX86_BUILTIN_PCMPESTRA128,
17518 IX86_BUILTIN_PCMPESTRC128,
17519 IX86_BUILTIN_PCMPESTRO128,
17520 IX86_BUILTIN_PCMPESTRS128,
17521 IX86_BUILTIN_PCMPESTRZ128,
17522 IX86_BUILTIN_PCMPISTRI128,
17523 IX86_BUILTIN_PCMPISTRM128,
17524 IX86_BUILTIN_PCMPISTRA128,
17525 IX86_BUILTIN_PCMPISTRC128,
17526 IX86_BUILTIN_PCMPISTRO128,
17527 IX86_BUILTIN_PCMPISTRS128,
17528 IX86_BUILTIN_PCMPISTRZ128,
17530 IX86_BUILTIN_PCMPGTQ,
17532 /* TFmode support builtins. */
17534 IX86_BUILTIN_FABSQ,
17535 IX86_BUILTIN_COPYSIGNQ,
17537 /* SSE5 instructions */
17538 IX86_BUILTIN_FMADDSS,
17539 IX86_BUILTIN_FMADDSD,
17540 IX86_BUILTIN_FMADDPS,
17541 IX86_BUILTIN_FMADDPD,
17542 IX86_BUILTIN_FMSUBSS,
17543 IX86_BUILTIN_FMSUBSD,
17544 IX86_BUILTIN_FMSUBPS,
17545 IX86_BUILTIN_FMSUBPD,
17546 IX86_BUILTIN_FNMADDSS,
17547 IX86_BUILTIN_FNMADDSD,
17548 IX86_BUILTIN_FNMADDPS,
17549 IX86_BUILTIN_FNMADDPD,
17550 IX86_BUILTIN_FNMSUBSS,
17551 IX86_BUILTIN_FNMSUBSD,
17552 IX86_BUILTIN_FNMSUBPS,
17553 IX86_BUILTIN_FNMSUBPD,
17554 IX86_BUILTIN_PCMOV_V2DI,
17555 IX86_BUILTIN_PCMOV_V4SI,
17556 IX86_BUILTIN_PCMOV_V8HI,
17557 IX86_BUILTIN_PCMOV_V16QI,
17558 IX86_BUILTIN_PCMOV_V4SF,
17559 IX86_BUILTIN_PCMOV_V2DF,
17560 IX86_BUILTIN_PPERM,
17561 IX86_BUILTIN_PERMPS,
17562 IX86_BUILTIN_PERMPD,
17563 IX86_BUILTIN_PMACSSWW,
17564 IX86_BUILTIN_PMACSWW,
17565 IX86_BUILTIN_PMACSSWD,
17566 IX86_BUILTIN_PMACSWD,
17567 IX86_BUILTIN_PMACSSDD,
17568 IX86_BUILTIN_PMACSDD,
17569 IX86_BUILTIN_PMACSSDQL,
17570 IX86_BUILTIN_PMACSSDQH,
17571 IX86_BUILTIN_PMACSDQL,
17572 IX86_BUILTIN_PMACSDQH,
17573 IX86_BUILTIN_PMADCSSWD,
17574 IX86_BUILTIN_PMADCSWD,
17575 IX86_BUILTIN_PHADDBW,
17576 IX86_BUILTIN_PHADDBD,
17577 IX86_BUILTIN_PHADDBQ,
17578 IX86_BUILTIN_PHADDWD,
17579 IX86_BUILTIN_PHADDWQ,
17580 IX86_BUILTIN_PHADDDQ,
17581 IX86_BUILTIN_PHADDUBW,
17582 IX86_BUILTIN_PHADDUBD,
17583 IX86_BUILTIN_PHADDUBQ,
17584 IX86_BUILTIN_PHADDUWD,
17585 IX86_BUILTIN_PHADDUWQ,
17586 IX86_BUILTIN_PHADDUDQ,
17587 IX86_BUILTIN_PHSUBBW,
17588 IX86_BUILTIN_PHSUBWD,
17589 IX86_BUILTIN_PHSUBDQ,
17590 IX86_BUILTIN_PROTB,
17591 IX86_BUILTIN_PROTW,
17592 IX86_BUILTIN_PROTD,
17593 IX86_BUILTIN_PROTQ,
17594 IX86_BUILTIN_PROTB_IMM,
17595 IX86_BUILTIN_PROTW_IMM,
17596 IX86_BUILTIN_PROTD_IMM,
17597 IX86_BUILTIN_PROTQ_IMM,
17598 IX86_BUILTIN_PSHLB,
17599 IX86_BUILTIN_PSHLW,
17600 IX86_BUILTIN_PSHLD,
17601 IX86_BUILTIN_PSHLQ,
17602 IX86_BUILTIN_PSHAB,
17603 IX86_BUILTIN_PSHAW,
17604 IX86_BUILTIN_PSHAD,
17605 IX86_BUILTIN_PSHAQ,
17606 IX86_BUILTIN_FRCZSS,
17607 IX86_BUILTIN_FRCZSD,
17608 IX86_BUILTIN_FRCZPS,
17609 IX86_BUILTIN_FRCZPD,
17610 IX86_BUILTIN_CVTPH2PS,
17611 IX86_BUILTIN_CVTPS2PH,
17613 IX86_BUILTIN_COMEQSS,
17614 IX86_BUILTIN_COMNESS,
17615 IX86_BUILTIN_COMLTSS,
17616 IX86_BUILTIN_COMLESS,
17617 IX86_BUILTIN_COMGTSS,
17618 IX86_BUILTIN_COMGESS,
17619 IX86_BUILTIN_COMUEQSS,
17620 IX86_BUILTIN_COMUNESS,
17621 IX86_BUILTIN_COMULTSS,
17622 IX86_BUILTIN_COMULESS,
17623 IX86_BUILTIN_COMUGTSS,
17624 IX86_BUILTIN_COMUGESS,
17625 IX86_BUILTIN_COMORDSS,
17626 IX86_BUILTIN_COMUNORDSS,
17627 IX86_BUILTIN_COMFALSESS,
17628 IX86_BUILTIN_COMTRUESS,
17630 IX86_BUILTIN_COMEQSD,
17631 IX86_BUILTIN_COMNESD,
17632 IX86_BUILTIN_COMLTSD,
17633 IX86_BUILTIN_COMLESD,
17634 IX86_BUILTIN_COMGTSD,
17635 IX86_BUILTIN_COMGESD,
17636 IX86_BUILTIN_COMUEQSD,
17637 IX86_BUILTIN_COMUNESD,
17638 IX86_BUILTIN_COMULTSD,
17639 IX86_BUILTIN_COMULESD,
17640 IX86_BUILTIN_COMUGTSD,
17641 IX86_BUILTIN_COMUGESD,
17642 IX86_BUILTIN_COMORDSD,
17643 IX86_BUILTIN_COMUNORDSD,
17644 IX86_BUILTIN_COMFALSESD,
17645 IX86_BUILTIN_COMTRUESD,
17647 IX86_BUILTIN_COMEQPS,
17648 IX86_BUILTIN_COMNEPS,
17649 IX86_BUILTIN_COMLTPS,
17650 IX86_BUILTIN_COMLEPS,
17651 IX86_BUILTIN_COMGTPS,
17652 IX86_BUILTIN_COMGEPS,
17653 IX86_BUILTIN_COMUEQPS,
17654 IX86_BUILTIN_COMUNEPS,
17655 IX86_BUILTIN_COMULTPS,
17656 IX86_BUILTIN_COMULEPS,
17657 IX86_BUILTIN_COMUGTPS,
17658 IX86_BUILTIN_COMUGEPS,
17659 IX86_BUILTIN_COMORDPS,
17660 IX86_BUILTIN_COMUNORDPS,
17661 IX86_BUILTIN_COMFALSEPS,
17662 IX86_BUILTIN_COMTRUEPS,
17664 IX86_BUILTIN_COMEQPD,
17665 IX86_BUILTIN_COMNEPD,
17666 IX86_BUILTIN_COMLTPD,
17667 IX86_BUILTIN_COMLEPD,
17668 IX86_BUILTIN_COMGTPD,
17669 IX86_BUILTIN_COMGEPD,
17670 IX86_BUILTIN_COMUEQPD,
17671 IX86_BUILTIN_COMUNEPD,
17672 IX86_BUILTIN_COMULTPD,
17673 IX86_BUILTIN_COMULEPD,
17674 IX86_BUILTIN_COMUGTPD,
17675 IX86_BUILTIN_COMUGEPD,
17676 IX86_BUILTIN_COMORDPD,
17677 IX86_BUILTIN_COMUNORDPD,
17678 IX86_BUILTIN_COMFALSEPD,
17679 IX86_BUILTIN_COMTRUEPD,
17681 IX86_BUILTIN_PCOMEQUB,
17682 IX86_BUILTIN_PCOMNEUB,
17683 IX86_BUILTIN_PCOMLTUB,
17684 IX86_BUILTIN_PCOMLEUB,
17685 IX86_BUILTIN_PCOMGTUB,
17686 IX86_BUILTIN_PCOMGEUB,
17687 IX86_BUILTIN_PCOMFALSEUB,
17688 IX86_BUILTIN_PCOMTRUEUB,
17689 IX86_BUILTIN_PCOMEQUW,
17690 IX86_BUILTIN_PCOMNEUW,
17691 IX86_BUILTIN_PCOMLTUW,
17692 IX86_BUILTIN_PCOMLEUW,
17693 IX86_BUILTIN_PCOMGTUW,
17694 IX86_BUILTIN_PCOMGEUW,
17695 IX86_BUILTIN_PCOMFALSEUW,
17696 IX86_BUILTIN_PCOMTRUEUW,
17697 IX86_BUILTIN_PCOMEQUD,
17698 IX86_BUILTIN_PCOMNEUD,
17699 IX86_BUILTIN_PCOMLTUD,
17700 IX86_BUILTIN_PCOMLEUD,
17701 IX86_BUILTIN_PCOMGTUD,
17702 IX86_BUILTIN_PCOMGEUD,
17703 IX86_BUILTIN_PCOMFALSEUD,
17704 IX86_BUILTIN_PCOMTRUEUD,
17705 IX86_BUILTIN_PCOMEQUQ,
17706 IX86_BUILTIN_PCOMNEUQ,
17707 IX86_BUILTIN_PCOMLTUQ,
17708 IX86_BUILTIN_PCOMLEUQ,
17709 IX86_BUILTIN_PCOMGTUQ,
17710 IX86_BUILTIN_PCOMGEUQ,
17711 IX86_BUILTIN_PCOMFALSEUQ,
17712 IX86_BUILTIN_PCOMTRUEUQ,
17714 IX86_BUILTIN_PCOMEQB,
17715 IX86_BUILTIN_PCOMNEB,
17716 IX86_BUILTIN_PCOMLTB,
17717 IX86_BUILTIN_PCOMLEB,
17718 IX86_BUILTIN_PCOMGTB,
17719 IX86_BUILTIN_PCOMGEB,
17720 IX86_BUILTIN_PCOMFALSEB,
17721 IX86_BUILTIN_PCOMTRUEB,
17722 IX86_BUILTIN_PCOMEQW,
17723 IX86_BUILTIN_PCOMNEW,
17724 IX86_BUILTIN_PCOMLTW,
17725 IX86_BUILTIN_PCOMLEW,
17726 IX86_BUILTIN_PCOMGTW,
17727 IX86_BUILTIN_PCOMGEW,
17728 IX86_BUILTIN_PCOMFALSEW,
17729 IX86_BUILTIN_PCOMTRUEW,
17730 IX86_BUILTIN_PCOMEQD,
17731 IX86_BUILTIN_PCOMNED,
17732 IX86_BUILTIN_PCOMLTD,
17733 IX86_BUILTIN_PCOMLED,
17734 IX86_BUILTIN_PCOMGTD,
17735 IX86_BUILTIN_PCOMGED,
17736 IX86_BUILTIN_PCOMFALSED,
17737 IX86_BUILTIN_PCOMTRUED,
17738 IX86_BUILTIN_PCOMEQQ,
17739 IX86_BUILTIN_PCOMNEQ,
17740 IX86_BUILTIN_PCOMLTQ,
17741 IX86_BUILTIN_PCOMLEQ,
17742 IX86_BUILTIN_PCOMGTQ,
17743 IX86_BUILTIN_PCOMGEQ,
17744 IX86_BUILTIN_PCOMFALSEQ,
17745 IX86_BUILTIN_PCOMTRUEQ,
17750 /* Table for the ix86 builtin decls. */
17751 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17753 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17754 * if the target_flags include one of MASK. Stores the function decl
17755 * in the ix86_builtins array.
17756 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17759 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17761 tree decl = NULL_TREE;
17763 if (mask & ix86_isa_flags
17764 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17766 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17768 ix86_builtins[(int) code] = decl;
17774 /* Like def_builtin, but also marks the function decl "const". */
17777 def_builtin_const (int mask, const char *name, tree type,
17778 enum ix86_builtins code)
17780 tree decl = def_builtin (mask, name, type, code);
17782 TREE_READONLY (decl) = 1;
17786 /* Bits for builtin_description.flag. */
17788 /* Set when we don't support the comparison natively, and should
17789 swap_comparison in order to support it. */
17790 #define BUILTIN_DESC_SWAP_OPERANDS 1
17792 struct builtin_description
17794 const unsigned int mask;
17795 const enum insn_code icode;
17796 const char *const name;
17797 const enum ix86_builtins code;
17798 const enum rtx_code comparison;
17802 static const struct builtin_description bdesc_comi[] =
17804 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17806 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17807 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17808 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17810 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17811 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17812 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17813 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17814 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17815 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17817 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17830 static const struct builtin_description bdesc_ptest[] =
17833 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17834 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17835 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17838 static const struct builtin_description bdesc_pcmpestr[] =
17841 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17842 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17843 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17844 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17845 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17846 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17847 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17850 static const struct builtin_description bdesc_pcmpistr[] =
17853 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17854 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17855 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17856 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17857 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17858 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17859 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17862 static const struct builtin_description bdesc_crc32[] =
17865 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17866 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17867 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17868 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17871 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17872 static const struct builtin_description bdesc_sse_3arg[] =
17875 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17876 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17877 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17878 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17879 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17880 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17881 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17882 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17883 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17884 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17885 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17886 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17889 static const struct builtin_description bdesc_2arg[] =
17892 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17893 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17894 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17915 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17918 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17919 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17924 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17925 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17929 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17931 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17932 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17941 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17944 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17948 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17956 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17961 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17968 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17969 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17978 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17979 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17980 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17981 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17983 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17984 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17985 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17986 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17988 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17991 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17992 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17993 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17995 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17997 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17999 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
18000 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
18001 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
18002 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
18003 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
18004 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
18006 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
18007 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
18008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
18009 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
18010 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
18011 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
18013 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
18014 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
18015 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
18016 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
18018 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
18019 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
18022 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
18023 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
18024 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
18025 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
18026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
18027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
18028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
18029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
18031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
18032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
18033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
18034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
18035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
18036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
18037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
18038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
18039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
18040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
18043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
18044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
18045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
18046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
18047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
18048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
18049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
18050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
18052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
18053 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
18054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
18055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
18057 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
18058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
18059 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
18060 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
18062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
18063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
18064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
18066 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
18069 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
18070 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
18071 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
18072 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
18073 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
18074 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
18075 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
18076 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
18078 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
18079 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
18080 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
18081 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
18082 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
18083 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
18084 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
18085 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
18087 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
18088 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
18090 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
18091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
18092 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
18093 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
18095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
18096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
18098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
18099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
18100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
18101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
18102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
18103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
18105 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
18106 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
18107 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
18108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
18110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
18111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
18112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
18113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
18114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
18115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
18116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
18117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
18119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
18120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
18121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
18123 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
18124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
18126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
18127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
18129 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
18130 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
18131 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
18133 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
18134 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
18135 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
18137 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
18138 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
18140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
18142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
18143 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
18144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
18145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
18148 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
18149 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
18150 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
18151 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
18152 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
18153 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
18156 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
18157 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
18158 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
18159 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
18160 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
18161 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
18162 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
18163 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
18164 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
18165 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
18166 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
18167 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18168 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18169 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18170 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18171 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18172 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18173 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18174 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18175 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18176 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18177 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18178 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18179 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18182 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18183 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18184 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18185 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18186 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18187 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18188 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18189 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18190 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18191 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18192 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18193 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18196 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18199 static const struct builtin_description bdesc_1arg[] =
18201 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18202 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18204 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18205 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS_NR, UNKNOWN, 0 },
18206 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18207 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, 0 },
18208 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18212 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18215 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18235 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18236 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18243 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18244 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18247 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18248 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18249 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18250 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18251 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18252 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18255 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18256 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18257 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18258 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18259 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18260 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18261 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18262 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18263 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18264 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18265 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18266 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18267 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18269 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18270 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
18271 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
18275 enum multi_arg_type {
18285 MULTI_ARG_3_PERMPS,
18286 MULTI_ARG_3_PERMPD,
18293 MULTI_ARG_2_DI_IMM,
18294 MULTI_ARG_2_SI_IMM,
18295 MULTI_ARG_2_HI_IMM,
18296 MULTI_ARG_2_QI_IMM,
18297 MULTI_ARG_2_SF_CMP,
18298 MULTI_ARG_2_DF_CMP,
18299 MULTI_ARG_2_DI_CMP,
18300 MULTI_ARG_2_SI_CMP,
18301 MULTI_ARG_2_HI_CMP,
18302 MULTI_ARG_2_QI_CMP,
18325 static const struct builtin_description bdesc_multi_arg[] =
18327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18336 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18353 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18361 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18369 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18370 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18371 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18372 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18385 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18401 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18477 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18496 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18497 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18498 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18499 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18517 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18523 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18524 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18525 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18527 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18528 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18529 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18530 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18531 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18532 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18533 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18535 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18536 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18537 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18538 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18539 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18540 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18541 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18542 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18544 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18545 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18546 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18547 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18548 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18549 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18550 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18551 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18553 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18554 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18555 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18556 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18557 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18558 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18559 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18560 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18563 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18564 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18567 ix86_init_mmx_sse_builtins (void)
18569 const struct builtin_description * d;
18572 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18573 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18574 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18575 tree V2DI_type_node
18576 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18577 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18578 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18579 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18580 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18581 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18582 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18584 tree pchar_type_node = build_pointer_type (char_type_node);
18585 tree pcchar_type_node = build_pointer_type (
18586 build_type_variant (char_type_node, 1, 0));
18587 tree pfloat_type_node = build_pointer_type (float_type_node);
18588 tree pcfloat_type_node = build_pointer_type (
18589 build_type_variant (float_type_node, 1, 0));
18590 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18591 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18592 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18595 tree int_ftype_v4sf_v4sf
18596 = build_function_type_list (integer_type_node,
18597 V4SF_type_node, V4SF_type_node, NULL_TREE);
18598 tree v4si_ftype_v4sf_v4sf
18599 = build_function_type_list (V4SI_type_node,
18600 V4SF_type_node, V4SF_type_node, NULL_TREE);
18601 /* MMX/SSE/integer conversions. */
18602 tree int_ftype_v4sf
18603 = build_function_type_list (integer_type_node,
18604 V4SF_type_node, NULL_TREE);
18605 tree int64_ftype_v4sf
18606 = build_function_type_list (long_long_integer_type_node,
18607 V4SF_type_node, NULL_TREE);
18608 tree int_ftype_v8qi
18609 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18610 tree v4sf_ftype_v4sf_int
18611 = build_function_type_list (V4SF_type_node,
18612 V4SF_type_node, integer_type_node, NULL_TREE);
18613 tree v4sf_ftype_v4sf_int64
18614 = build_function_type_list (V4SF_type_node,
18615 V4SF_type_node, long_long_integer_type_node,
18617 tree v4sf_ftype_v4sf_v2si
18618 = build_function_type_list (V4SF_type_node,
18619 V4SF_type_node, V2SI_type_node, NULL_TREE);
18621 /* Miscellaneous. */
18622 tree v8qi_ftype_v4hi_v4hi
18623 = build_function_type_list (V8QI_type_node,
18624 V4HI_type_node, V4HI_type_node, NULL_TREE);
18625 tree v4hi_ftype_v2si_v2si
18626 = build_function_type_list (V4HI_type_node,
18627 V2SI_type_node, V2SI_type_node, NULL_TREE);
18628 tree v4sf_ftype_v4sf_v4sf_int
18629 = build_function_type_list (V4SF_type_node,
18630 V4SF_type_node, V4SF_type_node,
18631 integer_type_node, NULL_TREE);
18632 tree v2si_ftype_v4hi_v4hi
18633 = build_function_type_list (V2SI_type_node,
18634 V4HI_type_node, V4HI_type_node, NULL_TREE);
18635 tree v4hi_ftype_v4hi_int
18636 = build_function_type_list (V4HI_type_node,
18637 V4HI_type_node, integer_type_node, NULL_TREE);
18638 tree v4hi_ftype_v4hi_di
18639 = build_function_type_list (V4HI_type_node,
18640 V4HI_type_node, long_long_unsigned_type_node,
18642 tree v2si_ftype_v2si_di
18643 = build_function_type_list (V2SI_type_node,
18644 V2SI_type_node, long_long_unsigned_type_node,
18646 tree void_ftype_void
18647 = build_function_type (void_type_node, void_list_node);
18648 tree void_ftype_unsigned
18649 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18650 tree void_ftype_unsigned_unsigned
18651 = build_function_type_list (void_type_node, unsigned_type_node,
18652 unsigned_type_node, NULL_TREE);
18653 tree void_ftype_pcvoid_unsigned_unsigned
18654 = build_function_type_list (void_type_node, const_ptr_type_node,
18655 unsigned_type_node, unsigned_type_node,
18657 tree unsigned_ftype_void
18658 = build_function_type (unsigned_type_node, void_list_node);
18659 tree v2si_ftype_v4sf
18660 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18661 /* Loads/stores. */
18662 tree void_ftype_v8qi_v8qi_pchar
18663 = build_function_type_list (void_type_node,
18664 V8QI_type_node, V8QI_type_node,
18665 pchar_type_node, NULL_TREE);
18666 tree v4sf_ftype_pcfloat
18667 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18668 /* @@@ the type is bogus */
18669 tree v4sf_ftype_v4sf_pv2si
18670 = build_function_type_list (V4SF_type_node,
18671 V4SF_type_node, pv2si_type_node, NULL_TREE);
18672 tree void_ftype_pv2si_v4sf
18673 = build_function_type_list (void_type_node,
18674 pv2si_type_node, V4SF_type_node, NULL_TREE);
18675 tree void_ftype_pfloat_v4sf
18676 = build_function_type_list (void_type_node,
18677 pfloat_type_node, V4SF_type_node, NULL_TREE);
18678 tree void_ftype_pdi_di
18679 = build_function_type_list (void_type_node,
18680 pdi_type_node, long_long_unsigned_type_node,
18682 tree void_ftype_pv2di_v2di
18683 = build_function_type_list (void_type_node,
18684 pv2di_type_node, V2DI_type_node, NULL_TREE);
18685 /* Normal vector unops. */
18686 tree v4sf_ftype_v4sf
18687 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18688 tree v16qi_ftype_v16qi
18689 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18690 tree v8hi_ftype_v8hi
18691 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18692 tree v4si_ftype_v4si
18693 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18694 tree v8qi_ftype_v8qi
18695 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18696 tree v4hi_ftype_v4hi
18697 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18699 /* Normal vector binops. */
18700 tree v4sf_ftype_v4sf_v4sf
18701 = build_function_type_list (V4SF_type_node,
18702 V4SF_type_node, V4SF_type_node, NULL_TREE);
18703 tree v8qi_ftype_v8qi_v8qi
18704 = build_function_type_list (V8QI_type_node,
18705 V8QI_type_node, V8QI_type_node, NULL_TREE);
18706 tree v4hi_ftype_v4hi_v4hi
18707 = build_function_type_list (V4HI_type_node,
18708 V4HI_type_node, V4HI_type_node, NULL_TREE);
18709 tree v2si_ftype_v2si_v2si
18710 = build_function_type_list (V2SI_type_node,
18711 V2SI_type_node, V2SI_type_node, NULL_TREE);
18712 tree di_ftype_di_di
18713 = build_function_type_list (long_long_unsigned_type_node,
18714 long_long_unsigned_type_node,
18715 long_long_unsigned_type_node, NULL_TREE);
18717 tree di_ftype_di_di_int
18718 = build_function_type_list (long_long_unsigned_type_node,
18719 long_long_unsigned_type_node,
18720 long_long_unsigned_type_node,
18721 integer_type_node, NULL_TREE);
18723 tree v2si_ftype_v2sf
18724 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18725 tree v2sf_ftype_v2si
18726 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18727 tree v2si_ftype_v2si
18728 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18729 tree v2sf_ftype_v2sf
18730 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18731 tree v2sf_ftype_v2sf_v2sf
18732 = build_function_type_list (V2SF_type_node,
18733 V2SF_type_node, V2SF_type_node, NULL_TREE);
18734 tree v2si_ftype_v2sf_v2sf
18735 = build_function_type_list (V2SI_type_node,
18736 V2SF_type_node, V2SF_type_node, NULL_TREE);
18737 tree pint_type_node = build_pointer_type (integer_type_node);
18738 tree pdouble_type_node = build_pointer_type (double_type_node);
18739 tree pcdouble_type_node = build_pointer_type (
18740 build_type_variant (double_type_node, 1, 0));
18741 tree int_ftype_v2df_v2df
18742 = build_function_type_list (integer_type_node,
18743 V2DF_type_node, V2DF_type_node, NULL_TREE);
18745 tree void_ftype_pcvoid
18746 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18747 tree v4sf_ftype_v4si
18748 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18749 tree v4si_ftype_v4sf
18750 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18751 tree v2df_ftype_v4si
18752 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18753 tree v4si_ftype_v2df
18754 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18755 tree v4si_ftype_v2df_v2df
18756 = build_function_type_list (V4SI_type_node,
18757 V2DF_type_node, V2DF_type_node, NULL_TREE);
18758 tree v2si_ftype_v2df
18759 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18760 tree v4sf_ftype_v2df
18761 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18762 tree v2df_ftype_v2si
18763 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18764 tree v2df_ftype_v4sf
18765 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18766 tree int_ftype_v2df
18767 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18768 tree int64_ftype_v2df
18769 = build_function_type_list (long_long_integer_type_node,
18770 V2DF_type_node, NULL_TREE);
18771 tree v2df_ftype_v2df_int
18772 = build_function_type_list (V2DF_type_node,
18773 V2DF_type_node, integer_type_node, NULL_TREE);
18774 tree v2df_ftype_v2df_int64
18775 = build_function_type_list (V2DF_type_node,
18776 V2DF_type_node, long_long_integer_type_node,
18778 tree v4sf_ftype_v4sf_v2df
18779 = build_function_type_list (V4SF_type_node,
18780 V4SF_type_node, V2DF_type_node, NULL_TREE);
18781 tree v2df_ftype_v2df_v4sf
18782 = build_function_type_list (V2DF_type_node,
18783 V2DF_type_node, V4SF_type_node, NULL_TREE);
18784 tree v2df_ftype_v2df_v2df_int
18785 = build_function_type_list (V2DF_type_node,
18786 V2DF_type_node, V2DF_type_node,
18789 tree v2df_ftype_v2df_pcdouble
18790 = build_function_type_list (V2DF_type_node,
18791 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18792 tree void_ftype_pdouble_v2df
18793 = build_function_type_list (void_type_node,
18794 pdouble_type_node, V2DF_type_node, NULL_TREE);
18795 tree void_ftype_pint_int
18796 = build_function_type_list (void_type_node,
18797 pint_type_node, integer_type_node, NULL_TREE);
18798 tree void_ftype_v16qi_v16qi_pchar
18799 = build_function_type_list (void_type_node,
18800 V16QI_type_node, V16QI_type_node,
18801 pchar_type_node, NULL_TREE);
18802 tree v2df_ftype_pcdouble
18803 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18804 tree v2df_ftype_v2df_v2df
18805 = build_function_type_list (V2DF_type_node,
18806 V2DF_type_node, V2DF_type_node, NULL_TREE);
18807 tree v16qi_ftype_v16qi_v16qi
18808 = build_function_type_list (V16QI_type_node,
18809 V16QI_type_node, V16QI_type_node, NULL_TREE);
18810 tree v8hi_ftype_v8hi_v8hi
18811 = build_function_type_list (V8HI_type_node,
18812 V8HI_type_node, V8HI_type_node, NULL_TREE);
18813 tree v4si_ftype_v4si_v4si
18814 = build_function_type_list (V4SI_type_node,
18815 V4SI_type_node, V4SI_type_node, NULL_TREE);
18816 tree v2di_ftype_v2di_v2di
18817 = build_function_type_list (V2DI_type_node,
18818 V2DI_type_node, V2DI_type_node, NULL_TREE);
18819 tree v2di_ftype_v2df_v2df
18820 = build_function_type_list (V2DI_type_node,
18821 V2DF_type_node, V2DF_type_node, NULL_TREE);
18822 tree v2df_ftype_v2df
18823 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18824 tree v2di_ftype_v2di_int
18825 = build_function_type_list (V2DI_type_node,
18826 V2DI_type_node, integer_type_node, NULL_TREE);
18827 tree v2di_ftype_v2di_v2di_int
18828 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18829 V2DI_type_node, integer_type_node, NULL_TREE);
18830 tree v4si_ftype_v4si_int
18831 = build_function_type_list (V4SI_type_node,
18832 V4SI_type_node, integer_type_node, NULL_TREE);
18833 tree v8hi_ftype_v8hi_int
18834 = build_function_type_list (V8HI_type_node,
18835 V8HI_type_node, integer_type_node, NULL_TREE);
18836 tree v4si_ftype_v8hi_v8hi
18837 = build_function_type_list (V4SI_type_node,
18838 V8HI_type_node, V8HI_type_node, NULL_TREE);
18839 tree di_ftype_v8qi_v8qi
18840 = build_function_type_list (long_long_unsigned_type_node,
18841 V8QI_type_node, V8QI_type_node, NULL_TREE);
18842 tree di_ftype_v2si_v2si
18843 = build_function_type_list (long_long_unsigned_type_node,
18844 V2SI_type_node, V2SI_type_node, NULL_TREE);
18845 tree v2di_ftype_v16qi_v16qi
18846 = build_function_type_list (V2DI_type_node,
18847 V16QI_type_node, V16QI_type_node, NULL_TREE);
18848 tree v2di_ftype_v4si_v4si
18849 = build_function_type_list (V2DI_type_node,
18850 V4SI_type_node, V4SI_type_node, NULL_TREE);
18851 tree int_ftype_v16qi
18852 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18853 tree v16qi_ftype_pcchar
18854 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18855 tree void_ftype_pchar_v16qi
18856 = build_function_type_list (void_type_node,
18857 pchar_type_node, V16QI_type_node, NULL_TREE);
18859 tree v2di_ftype_v2di_unsigned_unsigned
18860 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18861 unsigned_type_node, unsigned_type_node,
18863 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18864 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18865 unsigned_type_node, unsigned_type_node,
18867 tree v2di_ftype_v2di_v16qi
18868 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18870 tree v2df_ftype_v2df_v2df_v2df
18871 = build_function_type_list (V2DF_type_node,
18872 V2DF_type_node, V2DF_type_node,
18873 V2DF_type_node, NULL_TREE);
18874 tree v4sf_ftype_v4sf_v4sf_v4sf
18875 = build_function_type_list (V4SF_type_node,
18876 V4SF_type_node, V4SF_type_node,
18877 V4SF_type_node, NULL_TREE);
18878 tree v8hi_ftype_v16qi
18879 = build_function_type_list (V8HI_type_node, V16QI_type_node,
18881 tree v4si_ftype_v16qi
18882 = build_function_type_list (V4SI_type_node, V16QI_type_node,
18884 tree v2di_ftype_v16qi
18885 = build_function_type_list (V2DI_type_node, V16QI_type_node,
18887 tree v4si_ftype_v8hi
18888 = build_function_type_list (V4SI_type_node, V8HI_type_node,
18890 tree v2di_ftype_v8hi
18891 = build_function_type_list (V2DI_type_node, V8HI_type_node,
18893 tree v2di_ftype_v4si
18894 = build_function_type_list (V2DI_type_node, V4SI_type_node,
18896 tree v2di_ftype_pv2di
18897 = build_function_type_list (V2DI_type_node, pv2di_type_node,
18899 tree v16qi_ftype_v16qi_v16qi_int
18900 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18901 V16QI_type_node, integer_type_node,
18903 tree v16qi_ftype_v16qi_v16qi_v16qi
18904 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18905 V16QI_type_node, V16QI_type_node,
18907 tree v8hi_ftype_v8hi_v8hi_int
18908 = build_function_type_list (V8HI_type_node, V8HI_type_node,
18909 V8HI_type_node, integer_type_node,
18911 tree v4si_ftype_v4si_v4si_int
18912 = build_function_type_list (V4SI_type_node, V4SI_type_node,
18913 V4SI_type_node, integer_type_node,
18915 tree int_ftype_v2di_v2di
18916 = build_function_type_list (integer_type_node,
18917 V2DI_type_node, V2DI_type_node,
18919 tree int_ftype_v16qi_int_v16qi_int_int
18920 = build_function_type_list (integer_type_node,
18927 tree v16qi_ftype_v16qi_int_v16qi_int_int
18928 = build_function_type_list (V16QI_type_node,
18935 tree int_ftype_v16qi_v16qi_int
18936 = build_function_type_list (integer_type_node,
18942 /* SSE5 instructions */
18943 tree v2di_ftype_v2di_v2di_v2di
18944 = build_function_type_list (V2DI_type_node,
18950 tree v4si_ftype_v4si_v4si_v4si
18951 = build_function_type_list (V4SI_type_node,
18957 tree v4si_ftype_v4si_v4si_v2di
18958 = build_function_type_list (V4SI_type_node,
18964 tree v8hi_ftype_v8hi_v8hi_v8hi
18965 = build_function_type_list (V8HI_type_node,
18971 tree v8hi_ftype_v8hi_v8hi_v4si
18972 = build_function_type_list (V8HI_type_node,
18978 tree v2df_ftype_v2df_v2df_v16qi
18979 = build_function_type_list (V2DF_type_node,
18985 tree v4sf_ftype_v4sf_v4sf_v16qi
18986 = build_function_type_list (V4SF_type_node,
18992 tree v2di_ftype_v2di_si
18993 = build_function_type_list (V2DI_type_node,
18998 tree v4si_ftype_v4si_si
18999 = build_function_type_list (V4SI_type_node,
19004 tree v8hi_ftype_v8hi_si
19005 = build_function_type_list (V8HI_type_node,
19010 tree v16qi_ftype_v16qi_si
19011 = build_function_type_list (V16QI_type_node,
19015 tree v4sf_ftype_v4hi
19016 = build_function_type_list (V4SF_type_node,
19020 tree v4hi_ftype_v4sf
19021 = build_function_type_list (V4HI_type_node,
19025 tree v2di_ftype_v2di
19026 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19030 /* The __float80 type. */
19031 if (TYPE_MODE (long_double_type_node) == XFmode)
19032 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19036 /* The __float80 type. */
19037 tree float80_type_node = make_node (REAL_TYPE);
19039 TYPE_PRECISION (float80_type_node) = 80;
19040 layout_type (float80_type_node);
19041 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19047 tree float128_type_node = make_node (REAL_TYPE);
19049 TYPE_PRECISION (float128_type_node) = 128;
19050 layout_type (float128_type_node);
19051 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19054 /* TFmode support builtins. */
19055 ftype = build_function_type (float128_type_node,
19057 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19059 ftype = build_function_type_list (float128_type_node,
19060 float128_type_node,
19062 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19064 ftype = build_function_type_list (float128_type_node,
19065 float128_type_node,
19066 float128_type_node,
19068 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19071 /* Add all SSE builtins that are more or less simple operations on
19073 for (i = 0, d = bdesc_sse_3arg;
19074 i < ARRAY_SIZE (bdesc_sse_3arg);
19077 /* Use one of the operands; the target can have a different mode for
19078 mask-generating compares. */
19079 enum machine_mode mode;
19084 mode = insn_data[d->icode].operand[1].mode;
19089 type = v16qi_ftype_v16qi_v16qi_int;
19092 type = v8hi_ftype_v8hi_v8hi_int;
19095 type = v4si_ftype_v4si_v4si_int;
19098 type = v2di_ftype_v2di_v2di_int;
19101 type = v2df_ftype_v2df_v2df_int;
19104 type = v4sf_ftype_v4sf_v4sf_int;
19107 gcc_unreachable ();
19110 /* Override for variable blends. */
19113 case CODE_FOR_sse4_1_blendvpd:
19114 type = v2df_ftype_v2df_v2df_v2df;
19116 case CODE_FOR_sse4_1_blendvps:
19117 type = v4sf_ftype_v4sf_v4sf_v4sf;
19119 case CODE_FOR_sse4_1_pblendvb:
19120 type = v16qi_ftype_v16qi_v16qi_v16qi;
19126 def_builtin_const (d->mask, d->name, type, d->code);
19129 /* Add all builtins that are more or less simple operations on two
19131 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19133 /* Use one of the operands; the target can have a different mode for
19134 mask-generating compares. */
19135 enum machine_mode mode;
19140 mode = insn_data[d->icode].operand[1].mode;
19145 type = v16qi_ftype_v16qi_v16qi;
19148 type = v8hi_ftype_v8hi_v8hi;
19151 type = v4si_ftype_v4si_v4si;
19154 type = v2di_ftype_v2di_v2di;
19157 type = v2df_ftype_v2df_v2df;
19160 type = v4sf_ftype_v4sf_v4sf;
19163 type = v8qi_ftype_v8qi_v8qi;
19166 type = v4hi_ftype_v4hi_v4hi;
19169 type = v2si_ftype_v2si_v2si;
19172 type = di_ftype_di_di;
19176 gcc_unreachable ();
19179 /* Override for comparisons. */
19180 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19181 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19182 type = v4si_ftype_v4sf_v4sf;
19184 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19185 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19186 type = v2di_ftype_v2df_v2df;
19188 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19189 type = v4si_ftype_v2df_v2df;
19191 def_builtin_const (d->mask, d->name, type, d->code);
19194 /* Add all builtins that are more or less simple operations on 1 operand. */
19195 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19197 enum machine_mode mode;
19202 mode = insn_data[d->icode].operand[1].mode;
19207 type = v16qi_ftype_v16qi;
19210 type = v8hi_ftype_v8hi;
19213 type = v4si_ftype_v4si;
19216 type = v2df_ftype_v2df;
19219 type = v4sf_ftype_v4sf;
19222 type = v8qi_ftype_v8qi;
19225 type = v4hi_ftype_v4hi;
19228 type = v2si_ftype_v2si;
19235 def_builtin_const (d->mask, d->name, type, d->code);
19238 /* pcmpestr[im] insns. */
19239 for (i = 0, d = bdesc_pcmpestr;
19240 i < ARRAY_SIZE (bdesc_pcmpestr);
19243 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19244 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19246 ftype = int_ftype_v16qi_int_v16qi_int_int;
19247 def_builtin_const (d->mask, d->name, ftype, d->code);
19250 /* pcmpistr[im] insns. */
19251 for (i = 0, d = bdesc_pcmpistr;
19252 i < ARRAY_SIZE (bdesc_pcmpistr);
19255 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19256 ftype = v16qi_ftype_v16qi_v16qi_int;
19258 ftype = int_ftype_v16qi_v16qi_int;
19259 def_builtin_const (d->mask, d->name, ftype, d->code);
19262 /* Add the remaining MMX insns with somewhat more complicated types. */
19263 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19264 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
19265 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
19266 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
19268 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
19269 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
19270 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
19272 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
19273 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
19275 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19276 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19278 /* comi/ucomi insns. */
19279 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19280 if (d->mask == OPTION_MASK_ISA_SSE2)
19281 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19283 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19286 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19287 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19289 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19290 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19291 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19293 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19294 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19295 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19296 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19297 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19298 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19299 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19300 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19301 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19302 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19303 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19305 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19307 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19308 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19310 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19311 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19312 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19313 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19315 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19316 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19317 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19318 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19320 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19322 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19324 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19325 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19326 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19327 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS_NR);
19328 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19329 ftype = build_function_type_list (float_type_node,
19332 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19333 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19334 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS_NR);
19335 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19337 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
19339 /* Original 3DNow! */
19340 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19341 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19342 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19343 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19344 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19345 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19346 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19347 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19348 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19349 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19350 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19351 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19352 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19353 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19354 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19355 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19356 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19357 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19358 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19359 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19361 /* 3DNow! extension as used in the Athlon CPU. */
19362 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19363 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19364 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19365 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19366 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19367 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19370 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19372 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19373 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19375 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19376 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19378 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19379 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19380 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19381 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19382 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19384 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19385 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19386 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19387 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19389 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19390 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19392 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
19394 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19395 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19397 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19398 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19399 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19400 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19401 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19403 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19405 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19406 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19407 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19408 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19410 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19411 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19412 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19414 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19415 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19416 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19417 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19419 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19420 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19421 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19423 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19424 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19426 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19427 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19429 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19430 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19431 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19432 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19433 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19434 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19435 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19437 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19438 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19439 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19440 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19441 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19442 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19443 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19445 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19446 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19447 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19448 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19450 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19452 /* Prescott New Instructions. */
19453 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19454 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19455 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19458 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19459 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19462 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19463 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19464 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19465 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19466 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19467 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19468 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19469 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19470 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19471 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19472 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19473 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19474 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19475 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19477 /* SSE4.1 and SSE5 */
19478 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
19479 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
19480 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
19481 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
19484 ftype = build_function_type_list (unsigned_type_node,
19485 unsigned_type_node,
19486 unsigned_char_type_node,
19488 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19489 ftype = build_function_type_list (unsigned_type_node,
19490 unsigned_type_node,
19491 short_unsigned_type_node,
19493 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19494 ftype = build_function_type_list (unsigned_type_node,
19495 unsigned_type_node,
19496 unsigned_type_node,
19498 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19499 ftype = build_function_type_list (long_long_unsigned_type_node,
19500 long_long_unsigned_type_node,
19501 long_long_unsigned_type_node,
19503 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19505 /* AMDFAM10 SSE4A New built-ins */
19506 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19507 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19508 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19509 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19510 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19511 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19513 /* Access to the vec_init patterns. */
19514 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19515 integer_type_node, NULL_TREE);
19516 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19518 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19519 short_integer_type_node,
19520 short_integer_type_node,
19521 short_integer_type_node, NULL_TREE);
19522 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19524 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19525 char_type_node, char_type_node,
19526 char_type_node, char_type_node,
19527 char_type_node, char_type_node,
19528 char_type_node, NULL_TREE);
19529 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19531 /* Access to the vec_extract patterns. */
19532 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19533 integer_type_node, NULL_TREE);
19534 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19536 ftype = build_function_type_list (long_long_integer_type_node,
19537 V2DI_type_node, integer_type_node,
19539 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19541 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19542 integer_type_node, NULL_TREE);
19543 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19545 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19546 integer_type_node, NULL_TREE);
19547 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19549 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19550 integer_type_node, NULL_TREE);
19551 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19553 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19554 integer_type_node, NULL_TREE);
19555 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19557 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19558 integer_type_node, NULL_TREE);
19559 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19561 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19562 integer_type_node, NULL_TREE);
19563 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19565 /* Access to the vec_set patterns. */
19566 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19568 integer_type_node, NULL_TREE);
19569 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19571 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19573 integer_type_node, NULL_TREE);
19574 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19576 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19578 integer_type_node, NULL_TREE);
19579 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19581 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19583 integer_type_node, NULL_TREE);
19584 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19586 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19588 integer_type_node, NULL_TREE);
19589 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19591 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19593 integer_type_node, NULL_TREE);
19594 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19596 /* Add SSE5 multi-arg argument instructions */
19597 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19599 tree mtype = NULL_TREE;
19604 switch ((enum multi_arg_type)d->flag)
19606 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19607 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19608 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19609 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19610 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19611 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19612 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19613 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19614 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19615 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19616 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19617 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19618 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19619 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19620 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19621 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19622 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19623 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19624 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19625 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19626 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19627 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19628 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19629 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19630 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19631 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19632 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19633 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19634 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19635 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19636 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19637 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19638 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19639 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19640 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19641 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19642 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19643 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19644 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19645 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19646 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19647 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19648 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19649 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19650 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19651 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19652 case MULTI_ARG_UNKNOWN:
19654 gcc_unreachable ();
19658 def_builtin_const (d->mask, d->name, mtype, d->code);
19663 ix86_init_builtins (void)
19666 ix86_init_mmx_sse_builtins ();
19669 /* Errors in the source file can cause expand_expr to return const0_rtx
19670 where we expect a vector. To avoid crashing, use one of the vector
19671 clear instructions. */
19673 safe_vector_operand (rtx x, enum machine_mode mode)
19675 if (x == const0_rtx)
19676 x = CONST0_RTX (mode);
19680 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19681 4 operands. The third argument must be a constant smaller than 8
19685 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
19689 tree arg0 = CALL_EXPR_ARG (exp, 0);
19690 tree arg1 = CALL_EXPR_ARG (exp, 1);
19691 tree arg2 = CALL_EXPR_ARG (exp, 2);
19692 rtx op0 = expand_normal (arg0);
19693 rtx op1 = expand_normal (arg1);
19694 rtx op2 = expand_normal (arg2);
19695 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19696 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19697 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
19698 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
19700 if (VECTOR_MODE_P (mode1))
19701 op0 = safe_vector_operand (op0, mode1);
19702 if (VECTOR_MODE_P (mode2))
19703 op1 = safe_vector_operand (op1, mode2);
19704 if (VECTOR_MODE_P (mode3))
19705 op2 = safe_vector_operand (op2, mode3);
19709 || GET_MODE (target) != tmode
19710 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19711 target = gen_reg_rtx (tmode);
19713 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19714 op0 = copy_to_mode_reg (mode1, op0);
19715 if ((optimize && !register_operand (op1, mode2))
19716 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
19717 op1 = copy_to_mode_reg (mode2, op1);
19719 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19722 case CODE_FOR_sse4_1_blendvpd:
19723 case CODE_FOR_sse4_1_blendvps:
19724 case CODE_FOR_sse4_1_pblendvb:
19725 op2 = copy_to_mode_reg (mode3, op2);
19728 case CODE_FOR_sse4_1_roundsd:
19729 case CODE_FOR_sse4_1_roundss:
19730 error ("the third argument must be a 4-bit immediate");
19734 error ("the third argument must be an 8-bit immediate");
19738 pat = GEN_FCN (icode) (target, op0, op1, op2);
19745 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19748 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19751 tree arg0 = CALL_EXPR_ARG (exp, 0);
19752 tree arg1 = CALL_EXPR_ARG (exp, 1);
19753 rtx op0 = expand_normal (arg0);
19754 rtx op1 = expand_normal (arg1);
19755 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19756 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19757 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19761 || GET_MODE (target) != tmode
19762 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19763 target = gen_reg_rtx (tmode);
19765 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19766 op0 = copy_to_mode_reg (mode0, op0);
19767 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19769 op1 = copy_to_reg (op1);
19770 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19773 pat = GEN_FCN (icode) (target, op0, op1);
19780 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19783 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
19786 tree arg0 = CALL_EXPR_ARG (exp, 0);
19787 tree arg1 = CALL_EXPR_ARG (exp, 1);
19788 rtx op0 = expand_normal (arg0);
19789 rtx op1 = expand_normal (arg1);
19790 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19791 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19792 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19794 if (VECTOR_MODE_P (mode0))
19795 op0 = safe_vector_operand (op0, mode0);
19796 if (VECTOR_MODE_P (mode1))
19797 op1 = safe_vector_operand (op1, mode1);
19799 if (optimize || !target
19800 || GET_MODE (target) != tmode
19801 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19802 target = gen_reg_rtx (tmode);
19804 if (GET_MODE (op1) == SImode && mode1 == TImode)
19806 rtx x = gen_reg_rtx (V4SImode);
19807 emit_insn (gen_sse2_loadd (x, op1));
19808 op1 = gen_lowpart (TImode, x);
19811 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19812 op0 = copy_to_mode_reg (mode0, op0);
19813 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19814 op1 = copy_to_mode_reg (mode1, op1);
19816 /* ??? Using ix86_fixup_binary_operands is problematic when
19817 we've got mismatched modes. Fake it. */
19823 if (tmode == mode0 && tmode == mode1)
19825 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
19829 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
19831 op0 = force_reg (mode0, op0);
19832 op1 = force_reg (mode1, op1);
19833 target = gen_reg_rtx (tmode);
19836 pat = GEN_FCN (icode) (target, op0, op1);
19843 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19846 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
19847 enum multi_arg_type m_type,
19848 enum insn_code sub_code)
19853 bool comparison_p = false;
19855 bool last_arg_constant = false;
19856 int num_memory = 0;
19859 enum machine_mode mode;
19862 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19866 case MULTI_ARG_3_SF:
19867 case MULTI_ARG_3_DF:
19868 case MULTI_ARG_3_DI:
19869 case MULTI_ARG_3_SI:
19870 case MULTI_ARG_3_SI_DI:
19871 case MULTI_ARG_3_HI:
19872 case MULTI_ARG_3_HI_SI:
19873 case MULTI_ARG_3_QI:
19874 case MULTI_ARG_3_PERMPS:
19875 case MULTI_ARG_3_PERMPD:
19879 case MULTI_ARG_2_SF:
19880 case MULTI_ARG_2_DF:
19881 case MULTI_ARG_2_DI:
19882 case MULTI_ARG_2_SI:
19883 case MULTI_ARG_2_HI:
19884 case MULTI_ARG_2_QI:
19888 case MULTI_ARG_2_DI_IMM:
19889 case MULTI_ARG_2_SI_IMM:
19890 case MULTI_ARG_2_HI_IMM:
19891 case MULTI_ARG_2_QI_IMM:
19893 last_arg_constant = true;
19896 case MULTI_ARG_1_SF:
19897 case MULTI_ARG_1_DF:
19898 case MULTI_ARG_1_DI:
19899 case MULTI_ARG_1_SI:
19900 case MULTI_ARG_1_HI:
19901 case MULTI_ARG_1_QI:
19902 case MULTI_ARG_1_SI_DI:
19903 case MULTI_ARG_1_HI_DI:
19904 case MULTI_ARG_1_HI_SI:
19905 case MULTI_ARG_1_QI_DI:
19906 case MULTI_ARG_1_QI_SI:
19907 case MULTI_ARG_1_QI_HI:
19908 case MULTI_ARG_1_PH2PS:
19909 case MULTI_ARG_1_PS2PH:
19913 case MULTI_ARG_2_SF_CMP:
19914 case MULTI_ARG_2_DF_CMP:
19915 case MULTI_ARG_2_DI_CMP:
19916 case MULTI_ARG_2_SI_CMP:
19917 case MULTI_ARG_2_HI_CMP:
19918 case MULTI_ARG_2_QI_CMP:
19920 comparison_p = true;
19923 case MULTI_ARG_2_SF_TF:
19924 case MULTI_ARG_2_DF_TF:
19925 case MULTI_ARG_2_DI_TF:
19926 case MULTI_ARG_2_SI_TF:
19927 case MULTI_ARG_2_HI_TF:
19928 case MULTI_ARG_2_QI_TF:
19933 case MULTI_ARG_UNKNOWN:
19935 gcc_unreachable ();
19938 if (optimize || !target
19939 || GET_MODE (target) != tmode
19940 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19941 target = gen_reg_rtx (tmode);
19943 gcc_assert (nargs <= 4);
19945 for (i = 0; i < nargs; i++)
19947 tree arg = CALL_EXPR_ARG (exp, i);
19948 rtx op = expand_normal (arg);
19949 int adjust = (comparison_p) ? 1 : 0;
19950 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
19952 if (last_arg_constant && i == nargs-1)
19954 if (GET_CODE (op) != CONST_INT)
19956 error ("last argument must be an immediate");
19957 return gen_reg_rtx (tmode);
19962 if (VECTOR_MODE_P (mode))
19963 op = safe_vector_operand (op, mode);
19965 /* If we aren't optimizing, only allow one memory operand to be
19967 if (memory_operand (op, mode))
19970 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
19973 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
19975 op = force_reg (mode, op);
19979 args[i].mode = mode;
19985 pat = GEN_FCN (icode) (target, args[0].op);
19990 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19991 GEN_INT ((int)sub_code));
19992 else if (! comparison_p)
19993 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19996 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20000 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20005 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20009 gcc_unreachable ();
20019 /* Subroutine of ix86_expand_builtin to take care of stores. */
20022 ix86_expand_store_builtin (enum insn_code icode, tree exp)
20025 tree arg0 = CALL_EXPR_ARG (exp, 0);
20026 tree arg1 = CALL_EXPR_ARG (exp, 1);
20027 rtx op0 = expand_normal (arg0);
20028 rtx op1 = expand_normal (arg1);
20029 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
20030 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
20032 if (VECTOR_MODE_P (mode1))
20033 op1 = safe_vector_operand (op1, mode1);
20035 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20036 op1 = copy_to_mode_reg (mode1, op1);
20038 pat = GEN_FCN (icode) (op0, op1);
20044 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
20047 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
20048 rtx target, int do_load)
20051 tree arg0 = CALL_EXPR_ARG (exp, 0);
20052 rtx op0 = expand_normal (arg0);
20053 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20054 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20056 if (optimize || !target
20057 || GET_MODE (target) != tmode
20058 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20059 target = gen_reg_rtx (tmode);
20061 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20064 if (VECTOR_MODE_P (mode0))
20065 op0 = safe_vector_operand (op0, mode0);
20067 if ((optimize && !register_operand (op0, mode0))
20068 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20069 op0 = copy_to_mode_reg (mode0, op0);
20074 case CODE_FOR_sse4_1_roundpd:
20075 case CODE_FOR_sse4_1_roundps:
20077 tree arg1 = CALL_EXPR_ARG (exp, 1);
20078 rtx op1 = expand_normal (arg1);
20079 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20081 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20083 error ("the second argument must be a 4-bit immediate");
20086 pat = GEN_FCN (icode) (target, op0, op1);
20090 pat = GEN_FCN (icode) (target, op0);
20100 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20101 sqrtss, rsqrtss, rcpss. */
20104 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
20107 tree arg0 = CALL_EXPR_ARG (exp, 0);
20108 rtx op1, op0 = expand_normal (arg0);
20109 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20110 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20112 if (optimize || !target
20113 || GET_MODE (target) != tmode
20114 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20115 target = gen_reg_rtx (tmode);
20117 if (VECTOR_MODE_P (mode0))
20118 op0 = safe_vector_operand (op0, mode0);
20120 if ((optimize && !register_operand (op0, mode0))
20121 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20122 op0 = copy_to_mode_reg (mode0, op0);
20125 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20126 op1 = copy_to_mode_reg (mode0, op1);
20128 pat = GEN_FCN (icode) (target, op0, op1);
20135 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20138 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
20142 tree arg0 = CALL_EXPR_ARG (exp, 0);
20143 tree arg1 = CALL_EXPR_ARG (exp, 1);
20144 rtx op0 = expand_normal (arg0);
20145 rtx op1 = expand_normal (arg1);
20147 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20148 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20149 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20150 enum rtx_code comparison = d->comparison;
20152 if (VECTOR_MODE_P (mode0))
20153 op0 = safe_vector_operand (op0, mode0);
20154 if (VECTOR_MODE_P (mode1))
20155 op1 = safe_vector_operand (op1, mode1);
20157 /* Swap operands if we have a comparison that isn't available in
20159 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20161 rtx tmp = gen_reg_rtx (mode1);
20162 emit_move_insn (tmp, op1);
20167 if (optimize || !target
20168 || GET_MODE (target) != tmode
20169 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20170 target = gen_reg_rtx (tmode);
20172 if ((optimize && !register_operand (op0, mode0))
20173 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20174 op0 = copy_to_mode_reg (mode0, op0);
20175 if ((optimize && !register_operand (op1, mode1))
20176 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20177 op1 = copy_to_mode_reg (mode1, op1);
20179 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20180 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20187 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20190 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20194 tree arg0 = CALL_EXPR_ARG (exp, 0);
20195 tree arg1 = CALL_EXPR_ARG (exp, 1);
20196 rtx op0 = expand_normal (arg0);
20197 rtx op1 = expand_normal (arg1);
20198 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20199 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20200 enum rtx_code comparison = d->comparison;
20202 if (VECTOR_MODE_P (mode0))
20203 op0 = safe_vector_operand (op0, mode0);
20204 if (VECTOR_MODE_P (mode1))
20205 op1 = safe_vector_operand (op1, mode1);
20207 /* Swap operands if we have a comparison that isn't available in
20209 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20216 target = gen_reg_rtx (SImode);
20217 emit_move_insn (target, const0_rtx);
20218 target = gen_rtx_SUBREG (QImode, target, 0);
20220 if ((optimize && !register_operand (op0, mode0))
20221 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20222 op0 = copy_to_mode_reg (mode0, op0);
20223 if ((optimize && !register_operand (op1, mode1))
20224 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20225 op1 = copy_to_mode_reg (mode1, op1);
20227 pat = GEN_FCN (d->icode) (op0, op1);
20231 emit_insn (gen_rtx_SET (VOIDmode,
20232 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20233 gen_rtx_fmt_ee (comparison, QImode,
20237 return SUBREG_REG (target);
20240 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20243 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20247 tree arg0 = CALL_EXPR_ARG (exp, 0);
20248 tree arg1 = CALL_EXPR_ARG (exp, 1);
20249 rtx op0 = expand_normal (arg0);
20250 rtx op1 = expand_normal (arg1);
20251 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20252 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20253 enum rtx_code comparison = d->comparison;
20255 if (VECTOR_MODE_P (mode0))
20256 op0 = safe_vector_operand (op0, mode0);
20257 if (VECTOR_MODE_P (mode1))
20258 op1 = safe_vector_operand (op1, mode1);
20260 target = gen_reg_rtx (SImode);
20261 emit_move_insn (target, const0_rtx);
20262 target = gen_rtx_SUBREG (QImode, target, 0);
20264 if ((optimize && !register_operand (op0, mode0))
20265 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20266 op0 = copy_to_mode_reg (mode0, op0);
20267 if ((optimize && !register_operand (op1, mode1))
20268 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20269 op1 = copy_to_mode_reg (mode1, op1);
20271 pat = GEN_FCN (d->icode) (op0, op1);
20275 emit_insn (gen_rtx_SET (VOIDmode,
20276 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20277 gen_rtx_fmt_ee (comparison, QImode,
20281 return SUBREG_REG (target);
20284 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20287 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20288 tree exp, rtx target)
20291 tree arg0 = CALL_EXPR_ARG (exp, 0);
20292 tree arg1 = CALL_EXPR_ARG (exp, 1);
20293 tree arg2 = CALL_EXPR_ARG (exp, 2);
20294 tree arg3 = CALL_EXPR_ARG (exp, 3);
20295 tree arg4 = CALL_EXPR_ARG (exp, 4);
20296 rtx scratch0, scratch1;
20297 rtx op0 = expand_normal (arg0);
20298 rtx op1 = expand_normal (arg1);
20299 rtx op2 = expand_normal (arg2);
20300 rtx op3 = expand_normal (arg3);
20301 rtx op4 = expand_normal (arg4);
20302 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20304 tmode0 = insn_data[d->icode].operand[0].mode;
20305 tmode1 = insn_data[d->icode].operand[1].mode;
20306 modev2 = insn_data[d->icode].operand[2].mode;
20307 modei3 = insn_data[d->icode].operand[3].mode;
20308 modev4 = insn_data[d->icode].operand[4].mode;
20309 modei5 = insn_data[d->icode].operand[5].mode;
20310 modeimm = insn_data[d->icode].operand[6].mode;
20312 if (VECTOR_MODE_P (modev2))
20313 op0 = safe_vector_operand (op0, modev2);
20314 if (VECTOR_MODE_P (modev4))
20315 op2 = safe_vector_operand (op2, modev4);
20317 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20318 op0 = copy_to_mode_reg (modev2, op0);
20319 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20320 op1 = copy_to_mode_reg (modei3, op1);
20321 if ((optimize && !register_operand (op2, modev4))
20322 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20323 op2 = copy_to_mode_reg (modev4, op2);
20324 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20325 op3 = copy_to_mode_reg (modei5, op3);
20327 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20329 error ("the fifth argument must be a 8-bit immediate");
20333 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20335 if (optimize || !target
20336 || GET_MODE (target) != tmode0
20337 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20338 target = gen_reg_rtx (tmode0);
20340 scratch1 = gen_reg_rtx (tmode1);
20342 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20344 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20346 if (optimize || !target
20347 || GET_MODE (target) != tmode1
20348 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20349 target = gen_reg_rtx (tmode1);
20351 scratch0 = gen_reg_rtx (tmode0);
20353 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20357 gcc_assert (d->flag);
20359 scratch0 = gen_reg_rtx (tmode0);
20360 scratch1 = gen_reg_rtx (tmode1);
20362 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20372 target = gen_reg_rtx (SImode);
20373 emit_move_insn (target, const0_rtx);
20374 target = gen_rtx_SUBREG (QImode, target, 0);
20377 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20378 gen_rtx_fmt_ee (EQ, QImode,
20379 gen_rtx_REG ((enum machine_mode) d->flag,
20382 return SUBREG_REG (target);
20389 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20392 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20393 tree exp, rtx target)
20396 tree arg0 = CALL_EXPR_ARG (exp, 0);
20397 tree arg1 = CALL_EXPR_ARG (exp, 1);
20398 tree arg2 = CALL_EXPR_ARG (exp, 2);
20399 rtx scratch0, scratch1;
20400 rtx op0 = expand_normal (arg0);
20401 rtx op1 = expand_normal (arg1);
20402 rtx op2 = expand_normal (arg2);
20403 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20405 tmode0 = insn_data[d->icode].operand[0].mode;
20406 tmode1 = insn_data[d->icode].operand[1].mode;
20407 modev2 = insn_data[d->icode].operand[2].mode;
20408 modev3 = insn_data[d->icode].operand[3].mode;
20409 modeimm = insn_data[d->icode].operand[4].mode;
20411 if (VECTOR_MODE_P (modev2))
20412 op0 = safe_vector_operand (op0, modev2);
20413 if (VECTOR_MODE_P (modev3))
20414 op1 = safe_vector_operand (op1, modev3);
20416 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20417 op0 = copy_to_mode_reg (modev2, op0);
20418 if ((optimize && !register_operand (op1, modev3))
20419 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20420 op1 = copy_to_mode_reg (modev3, op1);
20422 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20424 error ("the third argument must be a 8-bit immediate");
20428 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20430 if (optimize || !target
20431 || GET_MODE (target) != tmode0
20432 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20433 target = gen_reg_rtx (tmode0);
20435 scratch1 = gen_reg_rtx (tmode1);
20437 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20439 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20441 if (optimize || !target
20442 || GET_MODE (target) != tmode1
20443 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20444 target = gen_reg_rtx (tmode1);
20446 scratch0 = gen_reg_rtx (tmode0);
20448 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20452 gcc_assert (d->flag);
20454 scratch0 = gen_reg_rtx (tmode0);
20455 scratch1 = gen_reg_rtx (tmode1);
20457 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20467 target = gen_reg_rtx (SImode);
20468 emit_move_insn (target, const0_rtx);
20469 target = gen_rtx_SUBREG (QImode, target, 0);
20472 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20473 gen_rtx_fmt_ee (EQ, QImode,
20474 gen_rtx_REG ((enum machine_mode) d->flag,
20477 return SUBREG_REG (target);
20483 /* Return the integer constant in ARG. Constrain it to be in the range
20484 of the subparts of VEC_TYPE; issue an error if not. */
20487 get_element_number (tree vec_type, tree arg)
20489 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20491 if (!host_integerp (arg, 1)
20492 || (elt = tree_low_cst (arg, 1), elt > max))
20494 error ("selector must be an integer constant in the range 0..%wi", max);
20501 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20502 ix86_expand_vector_init. We DO have language-level syntax for this, in
20503 the form of (type){ init-list }. Except that since we can't place emms
20504 instructions from inside the compiler, we can't allow the use of MMX
20505 registers unless the user explicitly asks for it. So we do *not* define
20506 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20507 we have builtins invoked by mmintrin.h that gives us license to emit
20508 these sorts of instructions. */
20511 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20513 enum machine_mode tmode = TYPE_MODE (type);
20514 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20515 int i, n_elt = GET_MODE_NUNITS (tmode);
20516 rtvec v = rtvec_alloc (n_elt);
20518 gcc_assert (VECTOR_MODE_P (tmode));
20519 gcc_assert (call_expr_nargs (exp) == n_elt);
20521 for (i = 0; i < n_elt; ++i)
20523 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20524 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20527 if (!target || !register_operand (target, tmode))
20528 target = gen_reg_rtx (tmode);
20530 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20534 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20535 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20536 had a language-level syntax for referencing vector elements. */
20539 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20541 enum machine_mode tmode, mode0;
20546 arg0 = CALL_EXPR_ARG (exp, 0);
20547 arg1 = CALL_EXPR_ARG (exp, 1);
20549 op0 = expand_normal (arg0);
20550 elt = get_element_number (TREE_TYPE (arg0), arg1);
20552 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20553 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20554 gcc_assert (VECTOR_MODE_P (mode0));
20556 op0 = force_reg (mode0, op0);
20558 if (optimize || !target || !register_operand (target, tmode))
20559 target = gen_reg_rtx (tmode);
20561 ix86_expand_vector_extract (true, target, op0, elt);
20566 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20567 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20568 a language-level syntax for referencing vector elements. */
20571 ix86_expand_vec_set_builtin (tree exp)
20573 enum machine_mode tmode, mode1;
20574 tree arg0, arg1, arg2;
20576 rtx op0, op1, target;
20578 arg0 = CALL_EXPR_ARG (exp, 0);
20579 arg1 = CALL_EXPR_ARG (exp, 1);
20580 arg2 = CALL_EXPR_ARG (exp, 2);
20582 tmode = TYPE_MODE (TREE_TYPE (arg0));
20583 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20584 gcc_assert (VECTOR_MODE_P (tmode));
20586 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20587 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20588 elt = get_element_number (TREE_TYPE (arg0), arg2);
20590 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20591 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20593 op0 = force_reg (tmode, op0);
20594 op1 = force_reg (mode1, op1);
20596 /* OP0 is the source of these builtin functions and shouldn't be
20597 modified. Create a copy, use it and return it as target. */
20598 target = gen_reg_rtx (tmode);
20599 emit_move_insn (target, op0);
20600 ix86_expand_vector_set (true, target, op1, elt);
20605 /* Expand an expression EXP that calls a built-in function,
20606 with result going to TARGET if that's convenient
20607 (and in mode MODE if that's convenient).
20608 SUBTARGET may be used as the target for computing one of EXP's operands.
20609 IGNORE is nonzero if the value is to be ignored. */
20612 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20613 enum machine_mode mode ATTRIBUTE_UNUSED,
20614 int ignore ATTRIBUTE_UNUSED)
20616 const struct builtin_description *d;
20618 enum insn_code icode;
20619 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20620 tree arg0, arg1, arg2, arg3;
20621 rtx op0, op1, op2, op3, pat;
20622 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20623 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20627 case IX86_BUILTIN_EMMS:
20628 emit_insn (gen_mmx_emms ());
20631 case IX86_BUILTIN_SFENCE:
20632 emit_insn (gen_sse_sfence ());
20635 case IX86_BUILTIN_MASKMOVQ:
20636 case IX86_BUILTIN_MASKMOVDQU:
20637 icode = (fcode == IX86_BUILTIN_MASKMOVQ
20638 ? CODE_FOR_mmx_maskmovq
20639 : CODE_FOR_sse2_maskmovdqu);
20640 /* Note the arg order is different from the operand order. */
20641 arg1 = CALL_EXPR_ARG (exp, 0);
20642 arg2 = CALL_EXPR_ARG (exp, 1);
20643 arg0 = CALL_EXPR_ARG (exp, 2);
20644 op0 = expand_normal (arg0);
20645 op1 = expand_normal (arg1);
20646 op2 = expand_normal (arg2);
20647 mode0 = insn_data[icode].operand[0].mode;
20648 mode1 = insn_data[icode].operand[1].mode;
20649 mode2 = insn_data[icode].operand[2].mode;
20651 op0 = force_reg (Pmode, op0);
20652 op0 = gen_rtx_MEM (mode1, op0);
20654 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20655 op0 = copy_to_mode_reg (mode0, op0);
20656 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20657 op1 = copy_to_mode_reg (mode1, op1);
20658 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20659 op2 = copy_to_mode_reg (mode2, op2);
20660 pat = GEN_FCN (icode) (op0, op1, op2);
20666 case IX86_BUILTIN_RSQRTF:
20667 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20669 case IX86_BUILTIN_SQRTSS:
20670 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20671 case IX86_BUILTIN_RSQRTSS:
20672 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20673 case IX86_BUILTIN_RCPSS:
20674 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20676 case IX86_BUILTIN_LOADUPS:
20677 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20679 case IX86_BUILTIN_STOREUPS:
20680 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20682 case IX86_BUILTIN_LOADHPS:
20683 case IX86_BUILTIN_LOADLPS:
20684 case IX86_BUILTIN_LOADHPD:
20685 case IX86_BUILTIN_LOADLPD:
20686 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20687 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20688 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20689 : CODE_FOR_sse2_loadlpd);
20690 arg0 = CALL_EXPR_ARG (exp, 0);
20691 arg1 = CALL_EXPR_ARG (exp, 1);
20692 op0 = expand_normal (arg0);
20693 op1 = expand_normal (arg1);
20694 tmode = insn_data[icode].operand[0].mode;
20695 mode0 = insn_data[icode].operand[1].mode;
20696 mode1 = insn_data[icode].operand[2].mode;
20698 op0 = force_reg (mode0, op0);
20699 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20700 if (optimize || target == 0
20701 || GET_MODE (target) != tmode
20702 || !register_operand (target, tmode))
20703 target = gen_reg_rtx (tmode);
20704 pat = GEN_FCN (icode) (target, op0, op1);
20710 case IX86_BUILTIN_STOREHPS:
20711 case IX86_BUILTIN_STORELPS:
20712 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20713 : CODE_FOR_sse_storelps);
20714 arg0 = CALL_EXPR_ARG (exp, 0);
20715 arg1 = CALL_EXPR_ARG (exp, 1);
20716 op0 = expand_normal (arg0);
20717 op1 = expand_normal (arg1);
20718 mode0 = insn_data[icode].operand[0].mode;
20719 mode1 = insn_data[icode].operand[1].mode;
20721 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20722 op1 = force_reg (mode1, op1);
20724 pat = GEN_FCN (icode) (op0, op1);
20730 case IX86_BUILTIN_MOVNTPS:
20731 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20732 case IX86_BUILTIN_MOVNTQ:
20733 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20735 case IX86_BUILTIN_LDMXCSR:
20736 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20737 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20738 emit_move_insn (target, op0);
20739 emit_insn (gen_sse_ldmxcsr (target));
20742 case IX86_BUILTIN_STMXCSR:
20743 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20744 emit_insn (gen_sse_stmxcsr (target));
20745 return copy_to_mode_reg (SImode, target);
20747 case IX86_BUILTIN_SHUFPS:
20748 case IX86_BUILTIN_SHUFPD:
20749 icode = (fcode == IX86_BUILTIN_SHUFPS
20750 ? CODE_FOR_sse_shufps
20751 : CODE_FOR_sse2_shufpd);
20752 arg0 = CALL_EXPR_ARG (exp, 0);
20753 arg1 = CALL_EXPR_ARG (exp, 1);
20754 arg2 = CALL_EXPR_ARG (exp, 2);
20755 op0 = expand_normal (arg0);
20756 op1 = expand_normal (arg1);
20757 op2 = expand_normal (arg2);
20758 tmode = insn_data[icode].operand[0].mode;
20759 mode0 = insn_data[icode].operand[1].mode;
20760 mode1 = insn_data[icode].operand[2].mode;
20761 mode2 = insn_data[icode].operand[3].mode;
20763 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20764 op0 = copy_to_mode_reg (mode0, op0);
20765 if ((optimize && !register_operand (op1, mode1))
20766 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
20767 op1 = copy_to_mode_reg (mode1, op1);
20768 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20770 /* @@@ better error message */
20771 error ("mask must be an immediate");
20772 return gen_reg_rtx (tmode);
20774 if (optimize || target == 0
20775 || GET_MODE (target) != tmode
20776 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20777 target = gen_reg_rtx (tmode);
20778 pat = GEN_FCN (icode) (target, op0, op1, op2);
20784 case IX86_BUILTIN_PSHUFW:
20785 case IX86_BUILTIN_PSHUFD:
20786 case IX86_BUILTIN_PSHUFHW:
20787 case IX86_BUILTIN_PSHUFLW:
20788 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20789 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20790 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20791 : CODE_FOR_mmx_pshufw);
20792 arg0 = CALL_EXPR_ARG (exp, 0);
20793 arg1 = CALL_EXPR_ARG (exp, 1);
20794 op0 = expand_normal (arg0);
20795 op1 = expand_normal (arg1);
20796 tmode = insn_data[icode].operand[0].mode;
20797 mode1 = insn_data[icode].operand[1].mode;
20798 mode2 = insn_data[icode].operand[2].mode;
20800 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20801 op0 = copy_to_mode_reg (mode1, op0);
20802 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20804 /* @@@ better error message */
20805 error ("mask must be an immediate");
20809 || GET_MODE (target) != tmode
20810 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20811 target = gen_reg_rtx (tmode);
20812 pat = GEN_FCN (icode) (target, op0, op1);
20818 case IX86_BUILTIN_PSLLW128:
20819 case IX86_BUILTIN_PSLLWI128:
20820 icode = CODE_FOR_ashlv8hi3;
20822 case IX86_BUILTIN_PSLLD128:
20823 case IX86_BUILTIN_PSLLDI128:
20824 icode = CODE_FOR_ashlv4si3;
20826 case IX86_BUILTIN_PSLLQ128:
20827 case IX86_BUILTIN_PSLLQI128:
20828 icode = CODE_FOR_ashlv2di3;
20830 case IX86_BUILTIN_PSRAW128:
20831 case IX86_BUILTIN_PSRAWI128:
20832 icode = CODE_FOR_ashrv8hi3;
20834 case IX86_BUILTIN_PSRAD128:
20835 case IX86_BUILTIN_PSRADI128:
20836 icode = CODE_FOR_ashrv4si3;
20838 case IX86_BUILTIN_PSRLW128:
20839 case IX86_BUILTIN_PSRLWI128:
20840 icode = CODE_FOR_lshrv8hi3;
20842 case IX86_BUILTIN_PSRLD128:
20843 case IX86_BUILTIN_PSRLDI128:
20844 icode = CODE_FOR_lshrv4si3;
20846 case IX86_BUILTIN_PSRLQ128:
20847 case IX86_BUILTIN_PSRLQI128:
20848 icode = CODE_FOR_lshrv2di3;
20851 arg0 = CALL_EXPR_ARG (exp, 0);
20852 arg1 = CALL_EXPR_ARG (exp, 1);
20853 op0 = expand_normal (arg0);
20854 op1 = expand_normal (arg1);
20856 tmode = insn_data[icode].operand[0].mode;
20857 mode1 = insn_data[icode].operand[1].mode;
20859 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20860 op0 = copy_to_reg (op0);
20862 if (!CONST_INT_P (op1))
20863 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
20865 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
20866 op1 = copy_to_reg (op1);
20868 target = gen_reg_rtx (tmode);
20869 pat = GEN_FCN (icode) (target, op0, op1);
20875 case IX86_BUILTIN_PSLLDQI128:
20876 case IX86_BUILTIN_PSRLDQI128:
20877 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
20878 : CODE_FOR_sse2_lshrti3);
20879 arg0 = CALL_EXPR_ARG (exp, 0);
20880 arg1 = CALL_EXPR_ARG (exp, 1);
20881 op0 = expand_normal (arg0);
20882 op1 = expand_normal (arg1);
20883 tmode = insn_data[icode].operand[0].mode;
20884 mode1 = insn_data[icode].operand[1].mode;
20885 mode2 = insn_data[icode].operand[2].mode;
20887 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20889 op0 = copy_to_reg (op0);
20890 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20892 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20894 error ("shift must be an immediate");
20897 target = gen_reg_rtx (V2DImode);
20898 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
20905 case IX86_BUILTIN_FEMMS:
20906 emit_insn (gen_mmx_femms ());
20909 case IX86_BUILTIN_PAVGUSB:
20910 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
20912 case IX86_BUILTIN_PF2ID:
20913 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
20915 case IX86_BUILTIN_PFACC:
20916 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
20918 case IX86_BUILTIN_PFADD:
20919 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
20921 case IX86_BUILTIN_PFCMPEQ:
20922 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
20924 case IX86_BUILTIN_PFCMPGE:
20925 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
20927 case IX86_BUILTIN_PFCMPGT:
20928 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
20930 case IX86_BUILTIN_PFMAX:
20931 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
20933 case IX86_BUILTIN_PFMIN:
20934 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
20936 case IX86_BUILTIN_PFMUL:
20937 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
20939 case IX86_BUILTIN_PFRCP:
20940 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
20942 case IX86_BUILTIN_PFRCPIT1:
20943 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
20945 case IX86_BUILTIN_PFRCPIT2:
20946 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
20948 case IX86_BUILTIN_PFRSQIT1:
20949 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
20951 case IX86_BUILTIN_PFRSQRT:
20952 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
20954 case IX86_BUILTIN_PFSUB:
20955 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
20957 case IX86_BUILTIN_PFSUBR:
20958 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
20960 case IX86_BUILTIN_PI2FD:
20961 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
20963 case IX86_BUILTIN_PMULHRW:
20964 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
20966 case IX86_BUILTIN_PF2IW:
20967 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
20969 case IX86_BUILTIN_PFNACC:
20970 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
20972 case IX86_BUILTIN_PFPNACC:
20973 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
20975 case IX86_BUILTIN_PI2FW:
20976 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
20978 case IX86_BUILTIN_PSWAPDSI:
20979 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
20981 case IX86_BUILTIN_PSWAPDSF:
20982 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
20984 case IX86_BUILTIN_SQRTSD:
20985 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
20986 case IX86_BUILTIN_LOADUPD:
20987 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
20988 case IX86_BUILTIN_STOREUPD:
20989 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
20991 case IX86_BUILTIN_MFENCE:
20992 emit_insn (gen_sse2_mfence ());
20994 case IX86_BUILTIN_LFENCE:
20995 emit_insn (gen_sse2_lfence ());
20998 case IX86_BUILTIN_CLFLUSH:
20999 arg0 = CALL_EXPR_ARG (exp, 0);
21000 op0 = expand_normal (arg0);
21001 icode = CODE_FOR_sse2_clflush;
21002 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21003 op0 = copy_to_mode_reg (Pmode, op0);
21005 emit_insn (gen_sse2_clflush (op0));
21008 case IX86_BUILTIN_MOVNTPD:
21009 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
21010 case IX86_BUILTIN_MOVNTDQ:
21011 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
21012 case IX86_BUILTIN_MOVNTI:
21013 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
21015 case IX86_BUILTIN_LOADDQU:
21016 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
21017 case IX86_BUILTIN_STOREDQU:
21018 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
21020 case IX86_BUILTIN_MONITOR:
21021 arg0 = CALL_EXPR_ARG (exp, 0);
21022 arg1 = CALL_EXPR_ARG (exp, 1);
21023 arg2 = CALL_EXPR_ARG (exp, 2);
21024 op0 = expand_normal (arg0);
21025 op1 = expand_normal (arg1);
21026 op2 = expand_normal (arg2);
21028 op0 = copy_to_mode_reg (Pmode, op0);
21030 op1 = copy_to_mode_reg (SImode, op1);
21032 op2 = copy_to_mode_reg (SImode, op2);
21034 emit_insn (gen_sse3_monitor (op0, op1, op2));
21036 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
21039 case IX86_BUILTIN_MWAIT:
21040 arg0 = CALL_EXPR_ARG (exp, 0);
21041 arg1 = CALL_EXPR_ARG (exp, 1);
21042 op0 = expand_normal (arg0);
21043 op1 = expand_normal (arg1);
21045 op0 = copy_to_mode_reg (SImode, op0);
21047 op1 = copy_to_mode_reg (SImode, op1);
21048 emit_insn (gen_sse3_mwait (op0, op1));
21051 case IX86_BUILTIN_LDDQU:
21052 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
21055 case IX86_BUILTIN_PALIGNR:
21056 case IX86_BUILTIN_PALIGNR128:
21057 if (fcode == IX86_BUILTIN_PALIGNR)
21059 icode = CODE_FOR_ssse3_palignrdi;
21064 icode = CODE_FOR_ssse3_palignrti;
21067 arg0 = CALL_EXPR_ARG (exp, 0);
21068 arg1 = CALL_EXPR_ARG (exp, 1);
21069 arg2 = CALL_EXPR_ARG (exp, 2);
21070 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21071 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21072 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21073 tmode = insn_data[icode].operand[0].mode;
21074 mode1 = insn_data[icode].operand[1].mode;
21075 mode2 = insn_data[icode].operand[2].mode;
21076 mode3 = insn_data[icode].operand[3].mode;
21078 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21080 op0 = copy_to_reg (op0);
21081 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
21083 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21085 op1 = copy_to_reg (op1);
21086 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
21088 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21090 error ("shift must be an immediate");
21093 target = gen_reg_rtx (mode);
21094 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
21101 case IX86_BUILTIN_MOVNTDQA:
21102 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
21105 case IX86_BUILTIN_MOVNTSD:
21106 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21108 case IX86_BUILTIN_MOVNTSS:
21109 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21111 case IX86_BUILTIN_INSERTQ:
21112 case IX86_BUILTIN_EXTRQ:
21113 icode = (fcode == IX86_BUILTIN_EXTRQ
21114 ? CODE_FOR_sse4a_extrq
21115 : CODE_FOR_sse4a_insertq);
21116 arg0 = CALL_EXPR_ARG (exp, 0);
21117 arg1 = CALL_EXPR_ARG (exp, 1);
21118 op0 = expand_normal (arg0);
21119 op1 = expand_normal (arg1);
21120 tmode = insn_data[icode].operand[0].mode;
21121 mode1 = insn_data[icode].operand[1].mode;
21122 mode2 = insn_data[icode].operand[2].mode;
21123 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21124 op0 = copy_to_mode_reg (mode1, op0);
21125 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21126 op1 = copy_to_mode_reg (mode2, op1);
21127 if (optimize || target == 0
21128 || GET_MODE (target) != tmode
21129 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21130 target = gen_reg_rtx (tmode);
21131 pat = GEN_FCN (icode) (target, op0, op1);
21137 case IX86_BUILTIN_EXTRQI:
21138 icode = CODE_FOR_sse4a_extrqi;
21139 arg0 = CALL_EXPR_ARG (exp, 0);
21140 arg1 = CALL_EXPR_ARG (exp, 1);
21141 arg2 = CALL_EXPR_ARG (exp, 2);
21142 op0 = expand_normal (arg0);
21143 op1 = expand_normal (arg1);
21144 op2 = expand_normal (arg2);
21145 tmode = insn_data[icode].operand[0].mode;
21146 mode1 = insn_data[icode].operand[1].mode;
21147 mode2 = insn_data[icode].operand[2].mode;
21148 mode3 = insn_data[icode].operand[3].mode;
21149 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21150 op0 = copy_to_mode_reg (mode1, op0);
21151 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21153 error ("index mask must be an immediate");
21154 return gen_reg_rtx (tmode);
21156 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21158 error ("length mask must be an immediate");
21159 return gen_reg_rtx (tmode);
21161 if (optimize || target == 0
21162 || GET_MODE (target) != tmode
21163 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21164 target = gen_reg_rtx (tmode);
21165 pat = GEN_FCN (icode) (target, op0, op1, op2);
21171 case IX86_BUILTIN_INSERTQI:
21172 icode = CODE_FOR_sse4a_insertqi;
21173 arg0 = CALL_EXPR_ARG (exp, 0);
21174 arg1 = CALL_EXPR_ARG (exp, 1);
21175 arg2 = CALL_EXPR_ARG (exp, 2);
21176 arg3 = CALL_EXPR_ARG (exp, 3);
21177 op0 = expand_normal (arg0);
21178 op1 = expand_normal (arg1);
21179 op2 = expand_normal (arg2);
21180 op3 = expand_normal (arg3);
21181 tmode = insn_data[icode].operand[0].mode;
21182 mode1 = insn_data[icode].operand[1].mode;
21183 mode2 = insn_data[icode].operand[2].mode;
21184 mode3 = insn_data[icode].operand[3].mode;
21185 mode4 = insn_data[icode].operand[4].mode;
21187 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21188 op0 = copy_to_mode_reg (mode1, op0);
21190 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21191 op1 = copy_to_mode_reg (mode2, op1);
21193 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21195 error ("index mask must be an immediate");
21196 return gen_reg_rtx (tmode);
21198 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21200 error ("length mask must be an immediate");
21201 return gen_reg_rtx (tmode);
21203 if (optimize || target == 0
21204 || GET_MODE (target) != tmode
21205 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21206 target = gen_reg_rtx (tmode);
21207 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21213 case IX86_BUILTIN_VEC_INIT_V2SI:
21214 case IX86_BUILTIN_VEC_INIT_V4HI:
21215 case IX86_BUILTIN_VEC_INIT_V8QI:
21216 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21218 case IX86_BUILTIN_VEC_EXT_V2DF:
21219 case IX86_BUILTIN_VEC_EXT_V2DI:
21220 case IX86_BUILTIN_VEC_EXT_V4SF:
21221 case IX86_BUILTIN_VEC_EXT_V4SI:
21222 case IX86_BUILTIN_VEC_EXT_V8HI:
21223 case IX86_BUILTIN_VEC_EXT_V2SI:
21224 case IX86_BUILTIN_VEC_EXT_V4HI:
21225 case IX86_BUILTIN_VEC_EXT_V16QI:
21226 return ix86_expand_vec_ext_builtin (exp, target);
21228 case IX86_BUILTIN_VEC_SET_V2DI:
21229 case IX86_BUILTIN_VEC_SET_V4SF:
21230 case IX86_BUILTIN_VEC_SET_V4SI:
21231 case IX86_BUILTIN_VEC_SET_V8HI:
21232 case IX86_BUILTIN_VEC_SET_V4HI:
21233 case IX86_BUILTIN_VEC_SET_V16QI:
21234 return ix86_expand_vec_set_builtin (exp);
21236 case IX86_BUILTIN_INFQ:
21238 REAL_VALUE_TYPE inf;
21242 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21244 tmp = validize_mem (force_const_mem (mode, tmp));
21247 target = gen_reg_rtx (mode);
21249 emit_move_insn (target, tmp);
21253 case IX86_BUILTIN_FABSQ:
21254 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21256 case IX86_BUILTIN_COPYSIGNQ:
21257 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21263 for (i = 0, d = bdesc_sse_3arg;
21264 i < ARRAY_SIZE (bdesc_sse_3arg);
21266 if (d->code == fcode)
21267 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
21270 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21271 if (d->code == fcode)
21273 /* Compares are treated specially. */
21274 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21275 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21276 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21277 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21278 return ix86_expand_sse_compare (d, exp, target);
21280 return ix86_expand_binop_builtin (d->icode, exp, target);
21283 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21284 if (d->code == fcode)
21285 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21287 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21288 if (d->code == fcode)
21289 return ix86_expand_sse_comi (d, exp, target);
21291 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21292 if (d->code == fcode)
21293 return ix86_expand_sse_ptest (d, exp, target);
21295 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21296 if (d->code == fcode)
21297 return ix86_expand_crc32 (d->icode, exp, target);
21299 for (i = 0, d = bdesc_pcmpestr;
21300 i < ARRAY_SIZE (bdesc_pcmpestr);
21302 if (d->code == fcode)
21303 return ix86_expand_sse_pcmpestr (d, exp, target);
21305 for (i = 0, d = bdesc_pcmpistr;
21306 i < ARRAY_SIZE (bdesc_pcmpistr);
21308 if (d->code == fcode)
21309 return ix86_expand_sse_pcmpistr (d, exp, target);
21311 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21312 if (d->code == fcode)
21313 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21314 (enum multi_arg_type)d->flag,
21317 gcc_unreachable ();
21320 /* Returns a function decl for a vectorized version of the builtin function
21321 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21322 if it is not available. */
21325 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21328 enum machine_mode in_mode, out_mode;
21331 if (TREE_CODE (type_out) != VECTOR_TYPE
21332 || TREE_CODE (type_in) != VECTOR_TYPE)
21335 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21336 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21337 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21338 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21342 case BUILT_IN_SQRT:
21343 if (out_mode == DFmode && out_n == 2
21344 && in_mode == DFmode && in_n == 2)
21345 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21348 case BUILT_IN_SQRTF:
21349 if (out_mode == SFmode && out_n == 4
21350 && in_mode == SFmode && in_n == 4)
21351 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21354 case BUILT_IN_LRINT:
21355 if (out_mode == SImode && out_n == 4
21356 && in_mode == DFmode && in_n == 2)
21357 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21360 case BUILT_IN_LRINTF:
21361 if (out_mode == SImode && out_n == 4
21362 && in_mode == SFmode && in_n == 4)
21363 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21370 /* Dispatch to a handler for a vectorization library. */
21371 if (ix86_veclib_handler)
21372 return (*ix86_veclib_handler)(fn, type_out, type_in);
21377 /* Handler for an ACML-style interface to a library with vectorized
21381 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21383 char name[20] = "__vr.._";
21384 tree fntype, new_fndecl, args;
21387 enum machine_mode el_mode, in_mode;
21390 /* The ACML is 64bits only and suitable for unsafe math only as
21391 it does not correctly support parts of IEEE with the required
21392 precision such as denormals. */
21394 || !flag_unsafe_math_optimizations)
21397 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21398 n = TYPE_VECTOR_SUBPARTS (type_out);
21399 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21400 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21401 if (el_mode != in_mode
21411 case BUILT_IN_LOG2:
21412 case BUILT_IN_LOG10:
21415 if (el_mode != DFmode
21420 case BUILT_IN_SINF:
21421 case BUILT_IN_COSF:
21422 case BUILT_IN_EXPF:
21423 case BUILT_IN_POWF:
21424 case BUILT_IN_LOGF:
21425 case BUILT_IN_LOG2F:
21426 case BUILT_IN_LOG10F:
21429 if (el_mode != SFmode
21438 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21439 sprintf (name + 7, "%s", bname+10);
21442 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21443 args = TREE_CHAIN (args))
21447 fntype = build_function_type_list (type_out, type_in, NULL);
21449 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21451 /* Build a function declaration for the vectorized function. */
21452 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21453 TREE_PUBLIC (new_fndecl) = 1;
21454 DECL_EXTERNAL (new_fndecl) = 1;
21455 DECL_IS_NOVOPS (new_fndecl) = 1;
21456 TREE_READONLY (new_fndecl) = 1;
21462 /* Returns a decl of a function that implements conversion of the
21463 input vector of type TYPE, or NULL_TREE if it is not available. */
21466 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21468 if (TREE_CODE (type) != VECTOR_TYPE)
21474 switch (TYPE_MODE (type))
21477 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21482 case FIX_TRUNC_EXPR:
21483 switch (TYPE_MODE (type))
21486 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21496 /* Returns a code for a target-specific builtin that implements
21497 reciprocal of the function, or NULL_TREE if not available. */
21500 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21501 bool sqrt ATTRIBUTE_UNUSED)
21503 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21504 && flag_finite_math_only && !flag_trapping_math
21505 && flag_unsafe_math_optimizations))
21509 /* Machine dependent builtins. */
21512 /* Vectorized version of sqrt to rsqrt conversion. */
21513 case IX86_BUILTIN_SQRTPS_NR:
21514 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21520 /* Normal builtins. */
21523 /* Sqrt to rsqrt conversion. */
21524 case BUILT_IN_SQRTF:
21525 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21532 /* Store OPERAND to the memory after reload is completed. This means
21533 that we can't easily use assign_stack_local. */
21535 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21539 gcc_assert (reload_completed);
21540 if (TARGET_RED_ZONE)
21542 result = gen_rtx_MEM (mode,
21543 gen_rtx_PLUS (Pmode,
21545 GEN_INT (-RED_ZONE_SIZE)));
21546 emit_move_insn (result, operand);
21548 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21554 operand = gen_lowpart (DImode, operand);
21558 gen_rtx_SET (VOIDmode,
21559 gen_rtx_MEM (DImode,
21560 gen_rtx_PRE_DEC (DImode,
21561 stack_pointer_rtx)),
21565 gcc_unreachable ();
21567 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21576 split_di (&operand, 1, operands, operands + 1);
21578 gen_rtx_SET (VOIDmode,
21579 gen_rtx_MEM (SImode,
21580 gen_rtx_PRE_DEC (Pmode,
21581 stack_pointer_rtx)),
21584 gen_rtx_SET (VOIDmode,
21585 gen_rtx_MEM (SImode,
21586 gen_rtx_PRE_DEC (Pmode,
21587 stack_pointer_rtx)),
21592 /* Store HImodes as SImodes. */
21593 operand = gen_lowpart (SImode, operand);
21597 gen_rtx_SET (VOIDmode,
21598 gen_rtx_MEM (GET_MODE (operand),
21599 gen_rtx_PRE_DEC (SImode,
21600 stack_pointer_rtx)),
21604 gcc_unreachable ();
21606 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21611 /* Free operand from the memory. */
21613 ix86_free_from_memory (enum machine_mode mode)
21615 if (!TARGET_RED_ZONE)
21619 if (mode == DImode || TARGET_64BIT)
21623 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21624 to pop or add instruction if registers are available. */
21625 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21626 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21631 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21632 QImode must go into class Q_REGS.
21633 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21634 movdf to do mem-to-mem moves through integer regs. */
21636 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21638 enum machine_mode mode = GET_MODE (x);
21640 /* We're only allowed to return a subclass of CLASS. Many of the
21641 following checks fail for NO_REGS, so eliminate that early. */
21642 if (regclass == NO_REGS)
21645 /* All classes can load zeros. */
21646 if (x == CONST0_RTX (mode))
21649 /* Force constants into memory if we are loading a (nonzero) constant into
21650 an MMX or SSE register. This is because there are no MMX/SSE instructions
21651 to load from a constant. */
21653 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21656 /* Prefer SSE regs only, if we can use them for math. */
21657 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21658 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21660 /* Floating-point constants need more complex checks. */
21661 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21663 /* General regs can load everything. */
21664 if (reg_class_subset_p (regclass, GENERAL_REGS))
21667 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21668 zero above. We only want to wind up preferring 80387 registers if
21669 we plan on doing computation with them. */
21671 && standard_80387_constant_p (x))
21673 /* Limit class to non-sse. */
21674 if (regclass == FLOAT_SSE_REGS)
21676 if (regclass == FP_TOP_SSE_REGS)
21678 if (regclass == FP_SECOND_SSE_REGS)
21679 return FP_SECOND_REG;
21680 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21687 /* Generally when we see PLUS here, it's the function invariant
21688 (plus soft-fp const_int). Which can only be computed into general
21690 if (GET_CODE (x) == PLUS)
21691 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21693 /* QImode constants are easy to load, but non-constant QImode data
21694 must go into Q_REGS. */
21695 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21697 if (reg_class_subset_p (regclass, Q_REGS))
21699 if (reg_class_subset_p (Q_REGS, regclass))
21707 /* Discourage putting floating-point values in SSE registers unless
21708 SSE math is being used, and likewise for the 387 registers. */
21710 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
21712 enum machine_mode mode = GET_MODE (x);
21714 /* Restrict the output reload class to the register bank that we are doing
21715 math on. If we would like not to return a subset of CLASS, reject this
21716 alternative: if reload cannot do this, it will still use its choice. */
21717 mode = GET_MODE (x);
21718 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21719 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
21721 if (X87_FLOAT_MODE_P (mode))
21723 if (regclass == FP_TOP_SSE_REGS)
21725 else if (regclass == FP_SECOND_SSE_REGS)
21726 return FP_SECOND_REG;
21728 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21734 /* If we are copying between general and FP registers, we need a memory
21735 location. The same is true for SSE and MMX registers.
21737 To optimize register_move_cost performance, allow inline variant.
21739 The macro can't work reliably when one of the CLASSES is class containing
21740 registers from multiple units (SSE, MMX, integer). We avoid this by never
21741 combining those units in single alternative in the machine description.
21742 Ensure that this constraint holds to avoid unexpected surprises.
21744 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21745 enforce these sanity checks. */
21748 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21749 enum machine_mode mode, int strict)
21751 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21752 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21753 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21754 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21755 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21756 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
21758 gcc_assert (!strict);
21762 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21765 /* ??? This is a lie. We do have moves between mmx/general, and for
21766 mmx/sse2. But by saying we need secondary memory we discourage the
21767 register allocator from using the mmx registers unless needed. */
21768 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21771 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21773 /* SSE1 doesn't have any direct moves from other classes. */
21777 /* If the target says that inter-unit moves are more expensive
21778 than moving through memory, then don't generate them. */
21779 if (!TARGET_INTER_UNIT_MOVES)
21782 /* Between SSE and general, we have moves no larger than word size. */
21783 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21791 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21792 enum machine_mode mode, int strict)
21794 return inline_secondary_memory_needed (class1, class2, mode, strict);
21797 /* Return true if the registers in CLASS cannot represent the change from
21798 modes FROM to TO. */
21801 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
21802 enum reg_class regclass)
21807 /* x87 registers can't do subreg at all, as all values are reformatted
21808 to extended precision. */
21809 if (MAYBE_FLOAT_CLASS_P (regclass))
21812 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21814 /* Vector registers do not support QI or HImode loads. If we don't
21815 disallow a change to these modes, reload will assume it's ok to
21816 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21817 the vec_dupv4hi pattern. */
21818 if (GET_MODE_SIZE (from) < 4)
21821 /* Vector registers do not support subreg with nonzero offsets, which
21822 are otherwise valid for integer registers. Since we can't see
21823 whether we have a nonzero offset from here, prohibit all
21824 nonparadoxical subregs changing size. */
21825 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
21832 /* Return the cost of moving data of mode M between a
21833 register and memory. A value of 2 is the default; this cost is
21834 relative to those in `REGISTER_MOVE_COST'.
21836 This function is used extensively by register_move_cost that is used to
21837 build tables at startup. Make it inline in this case.
21838 When IN is 2, return maximum of in and out move cost.
21840 If moving between registers and memory is more expensive than
21841 between two registers, you should define this macro to express the
21844 Model also increased moving costs of QImode registers in non
21848 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
21852 if (FLOAT_CLASS_P (regclass))
21870 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
21871 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
21873 if (SSE_CLASS_P (regclass))
21876 switch (GET_MODE_SIZE (mode))
21891 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
21892 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
21894 if (MMX_CLASS_P (regclass))
21897 switch (GET_MODE_SIZE (mode))
21909 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
21910 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
21912 switch (GET_MODE_SIZE (mode))
21915 if (Q_CLASS_P (regclass) || TARGET_64BIT)
21918 return ix86_cost->int_store[0];
21919 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
21920 cost = ix86_cost->movzbl_load;
21922 cost = ix86_cost->int_load[0];
21924 return MAX (cost, ix86_cost->int_store[0]);
21930 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
21932 return ix86_cost->movzbl_load;
21934 return ix86_cost->int_store[0] + 4;
21939 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
21940 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
21942 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21943 if (mode == TFmode)
21946 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
21948 cost = ix86_cost->int_load[2];
21950 cost = ix86_cost->int_store[2];
21951 return (cost * (((int) GET_MODE_SIZE (mode)
21952 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
21957 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
21959 return inline_memory_move_cost (mode, regclass, in);
21963 /* Return the cost of moving data from a register in class CLASS1 to
21964 one in class CLASS2.
21966 It is not required that the cost always equal 2 when FROM is the same as TO;
21967 on some machines it is expensive to move between registers if they are not
21968 general registers. */
21971 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
21972 enum reg_class class2)
21974 /* In case we require secondary memory, compute cost of the store followed
21975 by load. In order to avoid bad register allocation choices, we need
21976 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21978 if (inline_secondary_memory_needed (class1, class2, mode, 0))
21982 cost += inline_memory_move_cost (mode, class1, 2);
21983 cost += inline_memory_move_cost (mode, class2, 2);
21985 /* In case of copying from general_purpose_register we may emit multiple
21986 stores followed by single load causing memory size mismatch stall.
21987 Count this as arbitrarily high cost of 20. */
21988 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
21991 /* In the case of FP/MMX moves, the registers actually overlap, and we
21992 have to switch modes in order to treat them differently. */
21993 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21994 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22000 /* Moves between SSE/MMX and integer unit are expensive. */
22001 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22002 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22004 /* ??? By keeping returned value relatively high, we limit the number
22005 of moves between integer and MMX/SSE registers for all targets.
22006 Additionally, high value prevents problem with x86_modes_tieable_p(),
22007 where integer modes in MMX/SSE registers are not tieable
22008 because of missing QImode and HImode moves to, from or between
22009 MMX/SSE registers. */
22010 return MAX (8, ix86_cost->mmxsse_to_integer);
22012 if (MAYBE_FLOAT_CLASS_P (class1))
22013 return ix86_cost->fp_move;
22014 if (MAYBE_SSE_CLASS_P (class1))
22015 return ix86_cost->sse_move;
22016 if (MAYBE_MMX_CLASS_P (class1))
22017 return ix86_cost->mmx_move;
22021 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22024 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22026 /* Flags and only flags can only hold CCmode values. */
22027 if (CC_REGNO_P (regno))
22028 return GET_MODE_CLASS (mode) == MODE_CC;
22029 if (GET_MODE_CLASS (mode) == MODE_CC
22030 || GET_MODE_CLASS (mode) == MODE_RANDOM
22031 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22033 if (FP_REGNO_P (regno))
22034 return VALID_FP_MODE_P (mode);
22035 if (SSE_REGNO_P (regno))
22037 /* We implement the move patterns for all vector modes into and
22038 out of SSE registers, even when no operation instructions
22040 return (VALID_SSE_REG_MODE (mode)
22041 || VALID_SSE2_REG_MODE (mode)
22042 || VALID_MMX_REG_MODE (mode)
22043 || VALID_MMX_REG_MODE_3DNOW (mode));
22045 if (MMX_REGNO_P (regno))
22047 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22048 so if the register is available at all, then we can move data of
22049 the given mode into or out of it. */
22050 return (VALID_MMX_REG_MODE (mode)
22051 || VALID_MMX_REG_MODE_3DNOW (mode));
22054 if (mode == QImode)
22056 /* Take care for QImode values - they can be in non-QI regs,
22057 but then they do cause partial register stalls. */
22058 if (regno < 4 || TARGET_64BIT)
22060 if (!TARGET_PARTIAL_REG_STALL)
22062 return reload_in_progress || reload_completed;
22064 /* We handle both integer and floats in the general purpose registers. */
22065 else if (VALID_INT_MODE_P (mode))
22067 else if (VALID_FP_MODE_P (mode))
22069 else if (VALID_DFP_MODE_P (mode))
22071 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22072 on to use that value in smaller contexts, this can easily force a
22073 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22074 supporting DImode, allow it. */
22075 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22081 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22082 tieable integer mode. */
22085 ix86_tieable_integer_mode_p (enum machine_mode mode)
22094 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22097 return TARGET_64BIT;
22104 /* Return true if MODE1 is accessible in a register that can hold MODE2
22105 without copying. That is, all register classes that can hold MODE2
22106 can also hold MODE1. */
22109 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22111 if (mode1 == mode2)
22114 if (ix86_tieable_integer_mode_p (mode1)
22115 && ix86_tieable_integer_mode_p (mode2))
22118 /* MODE2 being XFmode implies fp stack or general regs, which means we
22119 can tie any smaller floating point modes to it. Note that we do not
22120 tie this with TFmode. */
22121 if (mode2 == XFmode)
22122 return mode1 == SFmode || mode1 == DFmode;
22124 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22125 that we can tie it with SFmode. */
22126 if (mode2 == DFmode)
22127 return mode1 == SFmode;
22129 /* If MODE2 is only appropriate for an SSE register, then tie with
22130 any other mode acceptable to SSE registers. */
22131 if (GET_MODE_SIZE (mode2) == 16
22132 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22133 return (GET_MODE_SIZE (mode1) == 16
22134 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22136 /* If MODE2 is appropriate for an MMX register, then tie
22137 with any other mode acceptable to MMX registers. */
22138 if (GET_MODE_SIZE (mode2) == 8
22139 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22140 return (GET_MODE_SIZE (mode1) == 8
22141 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22146 /* Compute a (partial) cost for rtx X. Return true if the complete
22147 cost has been computed, and false if subexpressions should be
22148 scanned. In either case, *TOTAL contains the cost result. */
22151 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22153 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22154 enum machine_mode mode = GET_MODE (x);
22162 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22164 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22166 else if (flag_pic && SYMBOLIC_CONST (x)
22168 || (!GET_CODE (x) != LABEL_REF
22169 && (GET_CODE (x) != SYMBOL_REF
22170 || !SYMBOL_REF_LOCAL_P (x)))))
22177 if (mode == VOIDmode)
22180 switch (standard_80387_constant_p (x))
22185 default: /* Other constants */
22190 /* Start with (MEM (SYMBOL_REF)), since that's where
22191 it'll probably end up. Add a penalty for size. */
22192 *total = (COSTS_N_INSNS (1)
22193 + (flag_pic != 0 && !TARGET_64BIT)
22194 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22200 /* The zero extensions is often completely free on x86_64, so make
22201 it as cheap as possible. */
22202 if (TARGET_64BIT && mode == DImode
22203 && GET_MODE (XEXP (x, 0)) == SImode)
22205 else if (TARGET_ZERO_EXTEND_WITH_AND)
22206 *total = ix86_cost->add;
22208 *total = ix86_cost->movzx;
22212 *total = ix86_cost->movsx;
22216 if (CONST_INT_P (XEXP (x, 1))
22217 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22219 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22222 *total = ix86_cost->add;
22225 if ((value == 2 || value == 3)
22226 && ix86_cost->lea <= ix86_cost->shift_const)
22228 *total = ix86_cost->lea;
22238 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22240 if (CONST_INT_P (XEXP (x, 1)))
22242 if (INTVAL (XEXP (x, 1)) > 32)
22243 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22245 *total = ix86_cost->shift_const * 2;
22249 if (GET_CODE (XEXP (x, 1)) == AND)
22250 *total = ix86_cost->shift_var * 2;
22252 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22257 if (CONST_INT_P (XEXP (x, 1)))
22258 *total = ix86_cost->shift_const;
22260 *total = ix86_cost->shift_var;
22265 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22267 /* ??? SSE scalar cost should be used here. */
22268 *total = ix86_cost->fmul;
22271 else if (X87_FLOAT_MODE_P (mode))
22273 *total = ix86_cost->fmul;
22276 else if (FLOAT_MODE_P (mode))
22278 /* ??? SSE vector cost should be used here. */
22279 *total = ix86_cost->fmul;
22284 rtx op0 = XEXP (x, 0);
22285 rtx op1 = XEXP (x, 1);
22287 if (CONST_INT_P (XEXP (x, 1)))
22289 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22290 for (nbits = 0; value != 0; value &= value - 1)
22294 /* This is arbitrary. */
22297 /* Compute costs correctly for widening multiplication. */
22298 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22299 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22300 == GET_MODE_SIZE (mode))
22302 int is_mulwiden = 0;
22303 enum machine_mode inner_mode = GET_MODE (op0);
22305 if (GET_CODE (op0) == GET_CODE (op1))
22306 is_mulwiden = 1, op1 = XEXP (op1, 0);
22307 else if (CONST_INT_P (op1))
22309 if (GET_CODE (op0) == SIGN_EXTEND)
22310 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22313 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22317 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22320 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22321 + nbits * ix86_cost->mult_bit
22322 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22331 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22332 /* ??? SSE cost should be used here. */
22333 *total = ix86_cost->fdiv;
22334 else if (X87_FLOAT_MODE_P (mode))
22335 *total = ix86_cost->fdiv;
22336 else if (FLOAT_MODE_P (mode))
22337 /* ??? SSE vector cost should be used here. */
22338 *total = ix86_cost->fdiv;
22340 *total = ix86_cost->divide[MODE_INDEX (mode)];
22344 if (GET_MODE_CLASS (mode) == MODE_INT
22345 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22347 if (GET_CODE (XEXP (x, 0)) == PLUS
22348 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22349 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22350 && CONSTANT_P (XEXP (x, 1)))
22352 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22353 if (val == 2 || val == 4 || val == 8)
22355 *total = ix86_cost->lea;
22356 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22357 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22359 *total += rtx_cost (XEXP (x, 1), outer_code);
22363 else if (GET_CODE (XEXP (x, 0)) == MULT
22364 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22366 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22367 if (val == 2 || val == 4 || val == 8)
22369 *total = ix86_cost->lea;
22370 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22371 *total += rtx_cost (XEXP (x, 1), outer_code);
22375 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22377 *total = ix86_cost->lea;
22378 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22379 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22380 *total += rtx_cost (XEXP (x, 1), outer_code);
22387 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22389 /* ??? SSE cost should be used here. */
22390 *total = ix86_cost->fadd;
22393 else if (X87_FLOAT_MODE_P (mode))
22395 *total = ix86_cost->fadd;
22398 else if (FLOAT_MODE_P (mode))
22400 /* ??? SSE vector cost should be used here. */
22401 *total = ix86_cost->fadd;
22409 if (!TARGET_64BIT && mode == DImode)
22411 *total = (ix86_cost->add * 2
22412 + (rtx_cost (XEXP (x, 0), outer_code)
22413 << (GET_MODE (XEXP (x, 0)) != DImode))
22414 + (rtx_cost (XEXP (x, 1), outer_code)
22415 << (GET_MODE (XEXP (x, 1)) != DImode)));
22421 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22423 /* ??? SSE cost should be used here. */
22424 *total = ix86_cost->fchs;
22427 else if (X87_FLOAT_MODE_P (mode))
22429 *total = ix86_cost->fchs;
22432 else if (FLOAT_MODE_P (mode))
22434 /* ??? SSE vector cost should be used here. */
22435 *total = ix86_cost->fchs;
22441 if (!TARGET_64BIT && mode == DImode)
22442 *total = ix86_cost->add * 2;
22444 *total = ix86_cost->add;
22448 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22449 && XEXP (XEXP (x, 0), 1) == const1_rtx
22450 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22451 && XEXP (x, 1) == const0_rtx)
22453 /* This kind of construct is implemented using test[bwl].
22454 Treat it as if we had an AND. */
22455 *total = (ix86_cost->add
22456 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22457 + rtx_cost (const1_rtx, outer_code));
22463 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22468 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22469 /* ??? SSE cost should be used here. */
22470 *total = ix86_cost->fabs;
22471 else if (X87_FLOAT_MODE_P (mode))
22472 *total = ix86_cost->fabs;
22473 else if (FLOAT_MODE_P (mode))
22474 /* ??? SSE vector cost should be used here. */
22475 *total = ix86_cost->fabs;
22479 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22480 /* ??? SSE cost should be used here. */
22481 *total = ix86_cost->fsqrt;
22482 else if (X87_FLOAT_MODE_P (mode))
22483 *total = ix86_cost->fsqrt;
22484 else if (FLOAT_MODE_P (mode))
22485 /* ??? SSE vector cost should be used here. */
22486 *total = ix86_cost->fsqrt;
22490 if (XINT (x, 1) == UNSPEC_TP)
22501 static int current_machopic_label_num;
22503 /* Given a symbol name and its associated stub, write out the
22504 definition of the stub. */
22507 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22509 unsigned int length;
22510 char *binder_name, *symbol_name, lazy_ptr_name[32];
22511 int label = ++current_machopic_label_num;
22513 /* For 64-bit we shouldn't get here. */
22514 gcc_assert (!TARGET_64BIT);
22516 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22517 symb = (*targetm.strip_name_encoding) (symb);
22519 length = strlen (stub);
22520 binder_name = alloca (length + 32);
22521 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22523 length = strlen (symb);
22524 symbol_name = alloca (length + 32);
22525 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22527 sprintf (lazy_ptr_name, "L%d$lz", label);
22530 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22532 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22534 fprintf (file, "%s:\n", stub);
22535 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22539 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22540 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22541 fprintf (file, "\tjmp\t*%%edx\n");
22544 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22546 fprintf (file, "%s:\n", binder_name);
22550 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22551 fprintf (file, "\tpushl\t%%eax\n");
22554 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22556 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22558 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22559 fprintf (file, "%s:\n", lazy_ptr_name);
22560 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22561 fprintf (file, "\t.long %s\n", binder_name);
22565 darwin_x86_file_end (void)
22567 darwin_file_end ();
22570 #endif /* TARGET_MACHO */
22572 /* Order the registers for register allocator. */
22575 x86_order_regs_for_local_alloc (void)
22580 /* First allocate the local general purpose registers. */
22581 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22582 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22583 reg_alloc_order [pos++] = i;
22585 /* Global general purpose registers. */
22586 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22587 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22588 reg_alloc_order [pos++] = i;
22590 /* x87 registers come first in case we are doing FP math
22592 if (!TARGET_SSE_MATH)
22593 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22594 reg_alloc_order [pos++] = i;
22596 /* SSE registers. */
22597 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22598 reg_alloc_order [pos++] = i;
22599 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22600 reg_alloc_order [pos++] = i;
22602 /* x87 registers. */
22603 if (TARGET_SSE_MATH)
22604 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22605 reg_alloc_order [pos++] = i;
22607 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22608 reg_alloc_order [pos++] = i;
22610 /* Initialize the rest of array as we do not allocate some registers
22612 while (pos < FIRST_PSEUDO_REGISTER)
22613 reg_alloc_order [pos++] = 0;
22616 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22617 struct attribute_spec.handler. */
22619 ix86_handle_struct_attribute (tree *node, tree name,
22620 tree args ATTRIBUTE_UNUSED,
22621 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22624 if (DECL_P (*node))
22626 if (TREE_CODE (*node) == TYPE_DECL)
22627 type = &TREE_TYPE (*node);
22632 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22633 || TREE_CODE (*type) == UNION_TYPE)))
22635 warning (OPT_Wattributes, "%qs attribute ignored",
22636 IDENTIFIER_POINTER (name));
22637 *no_add_attrs = true;
22640 else if ((is_attribute_p ("ms_struct", name)
22641 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22642 || ((is_attribute_p ("gcc_struct", name)
22643 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22645 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22646 IDENTIFIER_POINTER (name));
22647 *no_add_attrs = true;
22654 ix86_ms_bitfield_layout_p (const_tree record_type)
22656 return (TARGET_MS_BITFIELD_LAYOUT &&
22657 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22658 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22661 /* Returns an expression indicating where the this parameter is
22662 located on entry to the FUNCTION. */
22665 x86_this_parameter (tree function)
22667 tree type = TREE_TYPE (function);
22668 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22672 const int *parm_regs;
22674 if (TARGET_64BIT_MS_ABI)
22675 parm_regs = x86_64_ms_abi_int_parameter_registers;
22677 parm_regs = x86_64_int_parameter_registers;
22678 return gen_rtx_REG (DImode, parm_regs[aggr]);
22681 if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
22683 int regno = AX_REG;
22684 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22686 return gen_rtx_REG (SImode, regno);
22689 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22692 /* Determine whether x86_output_mi_thunk can succeed. */
22695 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
22696 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
22697 HOST_WIDE_INT vcall_offset, const_tree function)
22699 /* 64-bit can handle anything. */
22703 /* For 32-bit, everything's fine if we have one free register. */
22704 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22707 /* Need a free register for vcall_offset. */
22711 /* Need a free register for GOT references. */
22712 if (flag_pic && !(*targetm.binds_local_p) (function))
22715 /* Otherwise ok. */
22719 /* Output the assembler code for a thunk function. THUNK_DECL is the
22720 declaration for the thunk function itself, FUNCTION is the decl for
22721 the target function. DELTA is an immediate constant offset to be
22722 added to THIS. If VCALL_OFFSET is nonzero, the word at
22723 *(*this + vcall_offset) should be added to THIS. */
22726 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
22727 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
22728 HOST_WIDE_INT vcall_offset, tree function)
22731 rtx this_param = x86_this_parameter (function);
22734 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22735 pull it in now and let DELTA benefit. */
22736 if (REG_P (this_param))
22737 this_reg = this_param;
22738 else if (vcall_offset)
22740 /* Put the this parameter into %eax. */
22741 xops[0] = this_param;
22742 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
22743 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22746 this_reg = NULL_RTX;
22748 /* Adjust the this parameter by a fixed constant. */
22751 xops[0] = GEN_INT (delta);
22752 xops[1] = this_reg ? this_reg : this_param;
22755 if (!x86_64_general_operand (xops[0], DImode))
22757 tmp = gen_rtx_REG (DImode, R10_REG);
22759 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
22761 xops[1] = this_param;
22763 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22766 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22769 /* Adjust the this parameter by a value stored in the vtable. */
22773 tmp = gen_rtx_REG (DImode, R10_REG);
22776 int tmp_regno = CX_REG;
22777 if (lookup_attribute ("fastcall",
22778 TYPE_ATTRIBUTES (TREE_TYPE (function))))
22779 tmp_regno = AX_REG;
22780 tmp = gen_rtx_REG (SImode, tmp_regno);
22783 xops[0] = gen_rtx_MEM (Pmode, this_reg);
22786 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22788 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22790 /* Adjust the this parameter. */
22791 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
22792 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
22794 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
22795 xops[0] = GEN_INT (vcall_offset);
22797 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22798 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
22800 xops[1] = this_reg;
22802 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22804 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22807 /* If necessary, drop THIS back to its stack slot. */
22808 if (this_reg && this_reg != this_param)
22810 xops[0] = this_reg;
22811 xops[1] = this_param;
22812 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22815 xops[0] = XEXP (DECL_RTL (function), 0);
22818 if (!flag_pic || (*targetm.binds_local_p) (function))
22819 output_asm_insn ("jmp\t%P0", xops);
22820 /* All thunks should be in the same object as their target,
22821 and thus binds_local_p should be true. */
22822 else if (TARGET_64BIT_MS_ABI)
22823 gcc_unreachable ();
22826 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
22827 tmp = gen_rtx_CONST (Pmode, tmp);
22828 tmp = gen_rtx_MEM (QImode, tmp);
22830 output_asm_insn ("jmp\t%A0", xops);
22835 if (!flag_pic || (*targetm.binds_local_p) (function))
22836 output_asm_insn ("jmp\t%P0", xops);
22841 rtx sym_ref = XEXP (DECL_RTL (function), 0);
22842 tmp = (gen_rtx_SYMBOL_REF
22844 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
22845 tmp = gen_rtx_MEM (QImode, tmp);
22847 output_asm_insn ("jmp\t%0", xops);
22850 #endif /* TARGET_MACHO */
22852 tmp = gen_rtx_REG (SImode, CX_REG);
22853 output_set_got (tmp, NULL_RTX);
22856 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
22857 output_asm_insn ("jmp\t{*}%1", xops);
22863 x86_file_start (void)
22865 default_file_start ();
22867 darwin_file_start ();
22869 if (X86_FILE_START_VERSION_DIRECTIVE)
22870 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22871 if (X86_FILE_START_FLTUSED)
22872 fputs ("\t.global\t__fltused\n", asm_out_file);
22873 if (ix86_asm_dialect == ASM_INTEL)
22874 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
22878 x86_field_alignment (tree field, int computed)
22880 enum machine_mode mode;
22881 tree type = TREE_TYPE (field);
22883 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22885 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
22886 ? get_inner_array_type (type) : type);
22887 if (mode == DFmode || mode == DCmode
22888 || GET_MODE_CLASS (mode) == MODE_INT
22889 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22890 return MIN (32, computed);
22894 /* Output assembler code to FILE to increment profiler label # LABELNO
22895 for profiling a function entry. */
22897 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22901 #ifndef NO_PROFILE_COUNTERS
22902 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
22905 if (!TARGET_64BIT_MS_ABI && flag_pic)
22906 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
22908 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22912 #ifndef NO_PROFILE_COUNTERS
22913 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22914 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
22916 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
22920 #ifndef NO_PROFILE_COUNTERS
22921 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
22922 PROFILE_COUNT_REGISTER);
22924 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22928 /* We don't have exact information about the insn sizes, but we may assume
22929 quite safely that we are informed about all 1 byte insns and memory
22930 address sizes. This is enough to eliminate unnecessary padding in
22934 min_insn_size (rtx insn)
22938 if (!INSN_P (insn) || !active_insn_p (insn))
22941 /* Discard alignments we've emit and jump instructions. */
22942 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22943 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22946 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
22947 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
22950 /* Important case - calls are always 5 bytes.
22951 It is common to have many calls in the row. */
22953 && symbolic_reference_mentioned_p (PATTERN (insn))
22954 && !SIBLING_CALL_P (insn))
22956 if (get_attr_length (insn) <= 1)
22959 /* For normal instructions we may rely on the sizes of addresses
22960 and the presence of symbol to require 4 bytes of encoding.
22961 This is not the case for jumps where references are PC relative. */
22962 if (!JUMP_P (insn))
22964 l = get_attr_length_address (insn);
22965 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22974 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22978 ix86_avoid_jump_misspredicts (void)
22980 rtx insn, start = get_insns ();
22981 int nbytes = 0, njumps = 0;
22984 /* Look for all minimal intervals of instructions containing 4 jumps.
22985 The intervals are bounded by START and INSN. NBYTES is the total
22986 size of instructions in the interval including INSN and not including
22987 START. When the NBYTES is smaller than 16 bytes, it is possible
22988 that the end of START and INSN ends up in the same 16byte page.
22990 The smallest offset in the page INSN can start is the case where START
22991 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22992 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22994 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22997 nbytes += min_insn_size (insn);
22999 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23000 INSN_UID (insn), min_insn_size (insn));
23002 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23003 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23011 start = NEXT_INSN (start);
23012 if ((JUMP_P (start)
23013 && GET_CODE (PATTERN (start)) != ADDR_VEC
23014 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23016 njumps--, isjump = 1;
23019 nbytes -= min_insn_size (start);
23021 gcc_assert (njumps >= 0);
23023 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23024 INSN_UID (start), INSN_UID (insn), nbytes);
23026 if (njumps == 3 && isjump && nbytes < 16)
23028 int padsize = 15 - nbytes + min_insn_size (insn);
23031 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23032 INSN_UID (insn), padsize);
23033 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23038 /* AMD Athlon works faster
23039 when RET is not destination of conditional jump or directly preceded
23040 by other jump instruction. We avoid the penalty by inserting NOP just
23041 before the RET instructions in such cases. */
23043 ix86_pad_returns (void)
23048 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23050 basic_block bb = e->src;
23051 rtx ret = BB_END (bb);
23053 bool replace = false;
23055 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23056 || !maybe_hot_bb_p (bb))
23058 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23059 if (active_insn_p (prev) || LABEL_P (prev))
23061 if (prev && LABEL_P (prev))
23066 FOR_EACH_EDGE (e, ei, bb->preds)
23067 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23068 && !(e->flags & EDGE_FALLTHRU))
23073 prev = prev_active_insn (ret);
23075 && ((JUMP_P (prev) && any_condjump_p (prev))
23078 /* Empty functions get branch mispredict even when the jump destination
23079 is not visible to us. */
23080 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23085 emit_insn_before (gen_return_internal_long (), ret);
23091 /* Implement machine specific optimizations. We implement padding of returns
23092 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23096 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23097 ix86_pad_returns ();
23098 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23099 ix86_avoid_jump_misspredicts ();
23102 /* Return nonzero when QImode register that must be represented via REX prefix
23105 x86_extended_QIreg_mentioned_p (rtx insn)
23108 extract_insn_cached (insn);
23109 for (i = 0; i < recog_data.n_operands; i++)
23110 if (REG_P (recog_data.operand[i])
23111 && REGNO (recog_data.operand[i]) >= 4)
23116 /* Return nonzero when P points to register encoded via REX prefix.
23117 Called via for_each_rtx. */
23119 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23121 unsigned int regno;
23124 regno = REGNO (*p);
23125 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23128 /* Return true when INSN mentions register that must be encoded using REX
23131 x86_extended_reg_mentioned_p (rtx insn)
23133 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23136 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23137 optabs would emit if we didn't have TFmode patterns. */
23140 x86_emit_floatuns (rtx operands[2])
23142 rtx neglab, donelab, i0, i1, f0, in, out;
23143 enum machine_mode mode, inmode;
23145 inmode = GET_MODE (operands[1]);
23146 gcc_assert (inmode == SImode || inmode == DImode);
23149 in = force_reg (inmode, operands[1]);
23150 mode = GET_MODE (out);
23151 neglab = gen_label_rtx ();
23152 donelab = gen_label_rtx ();
23153 f0 = gen_reg_rtx (mode);
23155 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23157 expand_float (out, in, 0);
23159 emit_jump_insn (gen_jump (donelab));
23162 emit_label (neglab);
23164 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23166 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23168 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23170 expand_float (f0, i0, 0);
23172 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23174 emit_label (donelab);
23177 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23178 with all elements equal to VAR. Return true if successful. */
23181 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23182 rtx target, rtx val)
23184 enum machine_mode smode, wsmode, wvmode;
23199 val = force_reg (GET_MODE_INNER (mode), val);
23200 x = gen_rtx_VEC_DUPLICATE (mode, val);
23201 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23207 if (TARGET_SSE || TARGET_3DNOW_A)
23209 val = gen_lowpart (SImode, val);
23210 x = gen_rtx_TRUNCATE (HImode, val);
23211 x = gen_rtx_VEC_DUPLICATE (mode, x);
23212 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23234 /* Extend HImode to SImode using a paradoxical SUBREG. */
23235 tmp1 = gen_reg_rtx (SImode);
23236 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23237 /* Insert the SImode value as low element of V4SImode vector. */
23238 tmp2 = gen_reg_rtx (V4SImode);
23239 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23240 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23241 CONST0_RTX (V4SImode),
23243 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23244 /* Cast the V4SImode vector back to a V8HImode vector. */
23245 tmp1 = gen_reg_rtx (V8HImode);
23246 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23247 /* Duplicate the low short through the whole low SImode word. */
23248 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23249 /* Cast the V8HImode vector back to a V4SImode vector. */
23250 tmp2 = gen_reg_rtx (V4SImode);
23251 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23252 /* Replicate the low element of the V4SImode vector. */
23253 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23254 /* Cast the V2SImode back to V8HImode, and store in target. */
23255 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23266 /* Extend QImode to SImode using a paradoxical SUBREG. */
23267 tmp1 = gen_reg_rtx (SImode);
23268 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23269 /* Insert the SImode value as low element of V4SImode vector. */
23270 tmp2 = gen_reg_rtx (V4SImode);
23271 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23272 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23273 CONST0_RTX (V4SImode),
23275 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23276 /* Cast the V4SImode vector back to a V16QImode vector. */
23277 tmp1 = gen_reg_rtx (V16QImode);
23278 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23279 /* Duplicate the low byte through the whole low SImode word. */
23280 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23281 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23282 /* Cast the V16QImode vector back to a V4SImode vector. */
23283 tmp2 = gen_reg_rtx (V4SImode);
23284 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23285 /* Replicate the low element of the V4SImode vector. */
23286 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23287 /* Cast the V2SImode back to V16QImode, and store in target. */
23288 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23296 /* Replicate the value once into the next wider mode and recurse. */
23297 val = convert_modes (wsmode, smode, val, true);
23298 x = expand_simple_binop (wsmode, ASHIFT, val,
23299 GEN_INT (GET_MODE_BITSIZE (smode)),
23300 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23301 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23303 x = gen_reg_rtx (wvmode);
23304 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23305 gcc_unreachable ();
23306 emit_move_insn (target, gen_lowpart (mode, x));
23314 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23315 whose ONE_VAR element is VAR, and other elements are zero. Return true
23319 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23320 rtx target, rtx var, int one_var)
23322 enum machine_mode vsimode;
23338 var = force_reg (GET_MODE_INNER (mode), var);
23339 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23340 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23345 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23346 new_target = gen_reg_rtx (mode);
23348 new_target = target;
23349 var = force_reg (GET_MODE_INNER (mode), var);
23350 x = gen_rtx_VEC_DUPLICATE (mode, var);
23351 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23352 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23355 /* We need to shuffle the value to the correct position, so
23356 create a new pseudo to store the intermediate result. */
23358 /* With SSE2, we can use the integer shuffle insns. */
23359 if (mode != V4SFmode && TARGET_SSE2)
23361 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23363 GEN_INT (one_var == 1 ? 0 : 1),
23364 GEN_INT (one_var == 2 ? 0 : 1),
23365 GEN_INT (one_var == 3 ? 0 : 1)));
23366 if (target != new_target)
23367 emit_move_insn (target, new_target);
23371 /* Otherwise convert the intermediate result to V4SFmode and
23372 use the SSE1 shuffle instructions. */
23373 if (mode != V4SFmode)
23375 tmp = gen_reg_rtx (V4SFmode);
23376 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23381 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23383 GEN_INT (one_var == 1 ? 0 : 1),
23384 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23385 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23387 if (mode != V4SFmode)
23388 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23389 else if (tmp != target)
23390 emit_move_insn (target, tmp);
23392 else if (target != new_target)
23393 emit_move_insn (target, new_target);
23398 vsimode = V4SImode;
23404 vsimode = V2SImode;
23410 /* Zero extend the variable element to SImode and recurse. */
23411 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23413 x = gen_reg_rtx (vsimode);
23414 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23416 gcc_unreachable ();
23418 emit_move_insn (target, gen_lowpart (mode, x));
23426 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23427 consisting of the values in VALS. It is known that all elements
23428 except ONE_VAR are constants. Return true if successful. */
23431 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23432 rtx target, rtx vals, int one_var)
23434 rtx var = XVECEXP (vals, 0, one_var);
23435 enum machine_mode wmode;
23438 const_vec = copy_rtx (vals);
23439 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23440 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23448 /* For the two element vectors, it's just as easy to use
23449 the general case. */
23465 /* There's no way to set one QImode entry easily. Combine
23466 the variable value with its adjacent constant value, and
23467 promote to an HImode set. */
23468 x = XVECEXP (vals, 0, one_var ^ 1);
23471 var = convert_modes (HImode, QImode, var, true);
23472 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23473 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23474 x = GEN_INT (INTVAL (x) & 0xff);
23478 var = convert_modes (HImode, QImode, var, true);
23479 x = gen_int_mode (INTVAL (x) << 8, HImode);
23481 if (x != const0_rtx)
23482 var = expand_simple_binop (HImode, IOR, var, x, var,
23483 1, OPTAB_LIB_WIDEN);
23485 x = gen_reg_rtx (wmode);
23486 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23487 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23489 emit_move_insn (target, gen_lowpart (mode, x));
23496 emit_move_insn (target, const_vec);
23497 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23501 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23502 all values variable, and none identical. */
23505 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23506 rtx target, rtx vals)
23508 enum machine_mode half_mode = GET_MODE_INNER (mode);
23509 rtx op0 = NULL, op1 = NULL;
23510 bool use_vec_concat = false;
23516 if (!mmx_ok && !TARGET_SSE)
23522 /* For the two element vectors, we always implement VEC_CONCAT. */
23523 op0 = XVECEXP (vals, 0, 0);
23524 op1 = XVECEXP (vals, 0, 1);
23525 use_vec_concat = true;
23529 half_mode = V2SFmode;
23532 half_mode = V2SImode;
23538 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23539 Recurse to load the two halves. */
23541 op0 = gen_reg_rtx (half_mode);
23542 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23543 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23545 op1 = gen_reg_rtx (half_mode);
23546 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23547 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23549 use_vec_concat = true;
23560 gcc_unreachable ();
23563 if (use_vec_concat)
23565 if (!register_operand (op0, half_mode))
23566 op0 = force_reg (half_mode, op0);
23567 if (!register_operand (op1, half_mode))
23568 op1 = force_reg (half_mode, op1);
23570 emit_insn (gen_rtx_SET (VOIDmode, target,
23571 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23575 int i, j, n_elts, n_words, n_elt_per_word;
23576 enum machine_mode inner_mode;
23577 rtx words[4], shift;
23579 inner_mode = GET_MODE_INNER (mode);
23580 n_elts = GET_MODE_NUNITS (mode);
23581 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23582 n_elt_per_word = n_elts / n_words;
23583 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23585 for (i = 0; i < n_words; ++i)
23587 rtx word = NULL_RTX;
23589 for (j = 0; j < n_elt_per_word; ++j)
23591 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23592 elt = convert_modes (word_mode, inner_mode, elt, true);
23598 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23599 word, 1, OPTAB_LIB_WIDEN);
23600 word = expand_simple_binop (word_mode, IOR, word, elt,
23601 word, 1, OPTAB_LIB_WIDEN);
23609 emit_move_insn (target, gen_lowpart (mode, words[0]));
23610 else if (n_words == 2)
23612 rtx tmp = gen_reg_rtx (mode);
23613 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23614 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23615 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23616 emit_move_insn (target, tmp);
23618 else if (n_words == 4)
23620 rtx tmp = gen_reg_rtx (V4SImode);
23621 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23622 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23623 emit_move_insn (target, gen_lowpart (mode, tmp));
23626 gcc_unreachable ();
23630 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23631 instructions unless MMX_OK is true. */
23634 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23636 enum machine_mode mode = GET_MODE (target);
23637 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23638 int n_elts = GET_MODE_NUNITS (mode);
23639 int n_var = 0, one_var = -1;
23640 bool all_same = true, all_const_zero = true;
23644 for (i = 0; i < n_elts; ++i)
23646 x = XVECEXP (vals, 0, i);
23647 if (!(CONST_INT_P (x)
23648 || GET_CODE (x) == CONST_DOUBLE
23649 || GET_CODE (x) == CONST_FIXED))
23650 n_var++, one_var = i;
23651 else if (x != CONST0_RTX (inner_mode))
23652 all_const_zero = false;
23653 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23657 /* Constants are best loaded from the constant pool. */
23660 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23664 /* If all values are identical, broadcast the value. */
23666 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23667 XVECEXP (vals, 0, 0)))
23670 /* Values where only one field is non-constant are best loaded from
23671 the pool and overwritten via move later. */
23675 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23676 XVECEXP (vals, 0, one_var),
23680 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23684 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
23688 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
23690 enum machine_mode mode = GET_MODE (target);
23691 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23692 bool use_vec_merge = false;
23701 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
23702 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
23704 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
23706 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
23707 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23713 use_vec_merge = TARGET_SSE4_1;
23721 /* For the two element vectors, we implement a VEC_CONCAT with
23722 the extraction of the other element. */
23724 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
23725 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
23728 op0 = val, op1 = tmp;
23730 op0 = tmp, op1 = val;
23732 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
23733 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23738 use_vec_merge = TARGET_SSE4_1;
23745 use_vec_merge = true;
23749 /* tmp = target = A B C D */
23750 tmp = copy_to_reg (target);
23751 /* target = A A B B */
23752 emit_insn (gen_sse_unpcklps (target, target, target));
23753 /* target = X A B B */
23754 ix86_expand_vector_set (false, target, val, 0);
23755 /* target = A X C D */
23756 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23757 GEN_INT (1), GEN_INT (0),
23758 GEN_INT (2+4), GEN_INT (3+4)));
23762 /* tmp = target = A B C D */
23763 tmp = copy_to_reg (target);
23764 /* tmp = X B C D */
23765 ix86_expand_vector_set (false, tmp, val, 0);
23766 /* target = A B X D */
23767 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23768 GEN_INT (0), GEN_INT (1),
23769 GEN_INT (0+4), GEN_INT (3+4)));
23773 /* tmp = target = A B C D */
23774 tmp = copy_to_reg (target);
23775 /* tmp = X B C D */
23776 ix86_expand_vector_set (false, tmp, val, 0);
23777 /* target = A B X D */
23778 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23779 GEN_INT (0), GEN_INT (1),
23780 GEN_INT (2+4), GEN_INT (0+4)));
23784 gcc_unreachable ();
23789 use_vec_merge = TARGET_SSE4_1;
23793 /* Element 0 handled by vec_merge below. */
23796 use_vec_merge = true;
23802 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23803 store into element 0, then shuffle them back. */
23807 order[0] = GEN_INT (elt);
23808 order[1] = const1_rtx;
23809 order[2] = const2_rtx;
23810 order[3] = GEN_INT (3);
23811 order[elt] = const0_rtx;
23813 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23814 order[1], order[2], order[3]));
23816 ix86_expand_vector_set (false, target, val, 0);
23818 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23819 order[1], order[2], order[3]));
23823 /* For SSE1, we have to reuse the V4SF code. */
23824 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
23825 gen_lowpart (SFmode, val), elt);
23830 use_vec_merge = TARGET_SSE2;
23833 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23837 use_vec_merge = TARGET_SSE4_1;
23847 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
23848 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
23849 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23853 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23855 emit_move_insn (mem, target);
23857 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23858 emit_move_insn (tmp, val);
23860 emit_move_insn (target, mem);
23865 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
23867 enum machine_mode mode = GET_MODE (vec);
23868 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23869 bool use_vec_extr = false;
23882 use_vec_extr = true;
23886 use_vec_extr = TARGET_SSE4_1;
23898 tmp = gen_reg_rtx (mode);
23899 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
23900 GEN_INT (elt), GEN_INT (elt),
23901 GEN_INT (elt+4), GEN_INT (elt+4)));
23905 tmp = gen_reg_rtx (mode);
23906 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
23910 gcc_unreachable ();
23913 use_vec_extr = true;
23918 use_vec_extr = TARGET_SSE4_1;
23932 tmp = gen_reg_rtx (mode);
23933 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
23934 GEN_INT (elt), GEN_INT (elt),
23935 GEN_INT (elt), GEN_INT (elt)));
23939 tmp = gen_reg_rtx (mode);
23940 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
23944 gcc_unreachable ();
23947 use_vec_extr = true;
23952 /* For SSE1, we have to reuse the V4SF code. */
23953 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
23954 gen_lowpart (V4SFmode, vec), elt);
23960 use_vec_extr = TARGET_SSE2;
23963 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23967 use_vec_extr = TARGET_SSE4_1;
23971 /* ??? Could extract the appropriate HImode element and shift. */
23978 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
23979 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
23981 /* Let the rtl optimizers know about the zero extension performed. */
23982 if (inner_mode == QImode || inner_mode == HImode)
23984 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
23985 target = gen_lowpart (SImode, target);
23988 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23992 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23994 emit_move_insn (mem, vec);
23996 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23997 emit_move_insn (target, tmp);
24001 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24002 pattern to reduce; DEST is the destination; IN is the input vector. */
24005 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24007 rtx tmp1, tmp2, tmp3;
24009 tmp1 = gen_reg_rtx (V4SFmode);
24010 tmp2 = gen_reg_rtx (V4SFmode);
24011 tmp3 = gen_reg_rtx (V4SFmode);
24013 emit_insn (gen_sse_movhlps (tmp1, in, in));
24014 emit_insn (fn (tmp2, tmp1, in));
24016 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24017 GEN_INT (1), GEN_INT (1),
24018 GEN_INT (1+4), GEN_INT (1+4)));
24019 emit_insn (fn (dest, tmp2, tmp3));
24022 /* Target hook for scalar_mode_supported_p. */
24024 ix86_scalar_mode_supported_p (enum machine_mode mode)
24026 if (DECIMAL_FLOAT_MODE_P (mode))
24028 else if (mode == TFmode)
24029 return TARGET_64BIT;
24031 return default_scalar_mode_supported_p (mode);
24034 /* Implements target hook vector_mode_supported_p. */
24036 ix86_vector_mode_supported_p (enum machine_mode mode)
24038 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24040 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24042 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24044 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24049 /* Target hook for c_mode_for_suffix. */
24050 static enum machine_mode
24051 ix86_c_mode_for_suffix (char suffix)
24053 if (TARGET_64BIT && suffix == 'q')
24055 if (TARGET_MMX && suffix == 'w')
24061 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24063 We do this in the new i386 backend to maintain source compatibility
24064 with the old cc0-based compiler. */
24067 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24068 tree inputs ATTRIBUTE_UNUSED,
24071 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24073 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24078 /* Implements target vector targetm.asm.encode_section_info. This
24079 is not used by netware. */
24081 static void ATTRIBUTE_UNUSED
24082 ix86_encode_section_info (tree decl, rtx rtl, int first)
24084 default_encode_section_info (decl, rtl, first);
24086 if (TREE_CODE (decl) == VAR_DECL
24087 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24088 && ix86_in_large_data_p (decl))
24089 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24092 /* Worker function for REVERSE_CONDITION. */
24095 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24097 return (mode != CCFPmode && mode != CCFPUmode
24098 ? reverse_condition (code)
24099 : reverse_condition_maybe_unordered (code));
24102 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24106 output_387_reg_move (rtx insn, rtx *operands)
24108 if (REG_P (operands[0]))
24110 if (REG_P (operands[1])
24111 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24113 if (REGNO (operands[0]) == FIRST_STACK_REG)
24114 return output_387_ffreep (operands, 0);
24115 return "fstp\t%y0";
24117 if (STACK_TOP_P (operands[0]))
24118 return "fld%z1\t%y1";
24121 else if (MEM_P (operands[0]))
24123 gcc_assert (REG_P (operands[1]));
24124 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24125 return "fstp%z0\t%y0";
24128 /* There is no non-popping store to memory for XFmode.
24129 So if we need one, follow the store with a load. */
24130 if (GET_MODE (operands[0]) == XFmode)
24131 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24133 return "fst%z0\t%y0";
24140 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24141 FP status register is set. */
24144 ix86_emit_fp_unordered_jump (rtx label)
24146 rtx reg = gen_reg_rtx (HImode);
24149 emit_insn (gen_x86_fnstsw_1 (reg));
24151 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24153 emit_insn (gen_x86_sahf_1 (reg));
24155 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24156 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24160 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24162 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24163 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24166 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24167 gen_rtx_LABEL_REF (VOIDmode, label),
24169 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24171 emit_jump_insn (temp);
24172 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24175 /* Output code to perform a log1p XFmode calculation. */
24177 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24179 rtx label1 = gen_label_rtx ();
24180 rtx label2 = gen_label_rtx ();
24182 rtx tmp = gen_reg_rtx (XFmode);
24183 rtx tmp2 = gen_reg_rtx (XFmode);
24185 emit_insn (gen_absxf2 (tmp, op1));
24186 emit_insn (gen_cmpxf (tmp,
24187 CONST_DOUBLE_FROM_REAL_VALUE (
24188 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24190 emit_jump_insn (gen_bge (label1));
24192 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24193 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24194 emit_jump (label2);
24196 emit_label (label1);
24197 emit_move_insn (tmp, CONST1_RTX (XFmode));
24198 emit_insn (gen_addxf3 (tmp, op1, tmp));
24199 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24200 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24202 emit_label (label2);
24205 /* Output code to perform a Newton-Rhapson approximation of a single precision
24206 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24208 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24210 rtx x0, x1, e0, e1, two;
24212 x0 = gen_reg_rtx (mode);
24213 e0 = gen_reg_rtx (mode);
24214 e1 = gen_reg_rtx (mode);
24215 x1 = gen_reg_rtx (mode);
24217 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24219 if (VECTOR_MODE_P (mode))
24220 two = ix86_build_const_vector (SFmode, true, two);
24222 two = force_reg (mode, two);
24224 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24226 /* x0 = rcp(b) estimate */
24227 emit_insn (gen_rtx_SET (VOIDmode, x0,
24228 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24231 emit_insn (gen_rtx_SET (VOIDmode, e0,
24232 gen_rtx_MULT (mode, x0, b)));
24234 emit_insn (gen_rtx_SET (VOIDmode, e1,
24235 gen_rtx_MINUS (mode, two, e0)));
24237 emit_insn (gen_rtx_SET (VOIDmode, x1,
24238 gen_rtx_MULT (mode, x0, e1)));
24240 emit_insn (gen_rtx_SET (VOIDmode, res,
24241 gen_rtx_MULT (mode, a, x1)));
24244 /* Output code to perform a Newton-Rhapson approximation of a
24245 single precision floating point [reciprocal] square root. */
24247 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24250 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24253 x0 = gen_reg_rtx (mode);
24254 e0 = gen_reg_rtx (mode);
24255 e1 = gen_reg_rtx (mode);
24256 e2 = gen_reg_rtx (mode);
24257 e3 = gen_reg_rtx (mode);
24259 real_arithmetic (&r, NEGATE_EXPR, &dconst3, NULL);
24260 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24262 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24263 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24265 if (VECTOR_MODE_P (mode))
24267 mthree = ix86_build_const_vector (SFmode, true, mthree);
24268 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24271 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24272 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24274 /* x0 = rsqrt(a) estimate */
24275 emit_insn (gen_rtx_SET (VOIDmode, x0,
24276 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24279 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24284 zero = gen_reg_rtx (mode);
24285 mask = gen_reg_rtx (mode);
24287 zero = force_reg (mode, CONST0_RTX(mode));
24288 emit_insn (gen_rtx_SET (VOIDmode, mask,
24289 gen_rtx_NE (mode, zero, a)));
24291 emit_insn (gen_rtx_SET (VOIDmode, x0,
24292 gen_rtx_AND (mode, x0, mask)));
24296 emit_insn (gen_rtx_SET (VOIDmode, e0,
24297 gen_rtx_MULT (mode, x0, a)));
24299 emit_insn (gen_rtx_SET (VOIDmode, e1,
24300 gen_rtx_MULT (mode, e0, x0)));
24303 mthree = force_reg (mode, mthree);
24304 emit_insn (gen_rtx_SET (VOIDmode, e2,
24305 gen_rtx_PLUS (mode, e1, mthree)));
24307 mhalf = force_reg (mode, mhalf);
24309 /* e3 = -.5 * x0 */
24310 emit_insn (gen_rtx_SET (VOIDmode, e3,
24311 gen_rtx_MULT (mode, x0, mhalf)));
24313 /* e3 = -.5 * e0 */
24314 emit_insn (gen_rtx_SET (VOIDmode, e3,
24315 gen_rtx_MULT (mode, e0, mhalf)));
24316 /* ret = e2 * e3 */
24317 emit_insn (gen_rtx_SET (VOIDmode, res,
24318 gen_rtx_MULT (mode, e2, e3)));
24321 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24323 static void ATTRIBUTE_UNUSED
24324 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24327 /* With Binutils 2.15, the "@unwind" marker must be specified on
24328 every occurrence of the ".eh_frame" section, not just the first
24331 && strcmp (name, ".eh_frame") == 0)
24333 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24334 flags & SECTION_WRITE ? "aw" : "a");
24337 default_elf_asm_named_section (name, flags, decl);
24340 /* Return the mangling of TYPE if it is an extended fundamental type. */
24342 static const char *
24343 ix86_mangle_type (const_tree type)
24345 type = TYPE_MAIN_VARIANT (type);
24347 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24348 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24351 switch (TYPE_MODE (type))
24354 /* __float128 is "g". */
24357 /* "long double" or __float80 is "e". */
24364 /* For 32-bit code we can save PIC register setup by using
24365 __stack_chk_fail_local hidden function instead of calling
24366 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24367 register, so it is better to call __stack_chk_fail directly. */
24370 ix86_stack_protect_fail (void)
24372 return TARGET_64BIT
24373 ? default_external_stack_protect_fail ()
24374 : default_hidden_stack_protect_fail ();
24377 /* Select a format to encode pointers in exception handling data. CODE
24378 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24379 true if the symbol may be affected by dynamic relocations.
24381 ??? All x86 object file formats are capable of representing this.
24382 After all, the relocation needed is the same as for the call insn.
24383 Whether or not a particular assembler allows us to enter such, I
24384 guess we'll have to see. */
24386 asm_preferred_eh_data_format (int code, int global)
24390 int type = DW_EH_PE_sdata8;
24392 || ix86_cmodel == CM_SMALL_PIC
24393 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24394 type = DW_EH_PE_sdata4;
24395 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24397 if (ix86_cmodel == CM_SMALL
24398 || (ix86_cmodel == CM_MEDIUM && code))
24399 return DW_EH_PE_udata4;
24400 return DW_EH_PE_absptr;
24403 /* Expand copysign from SIGN to the positive value ABS_VALUE
24404 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24407 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24409 enum machine_mode mode = GET_MODE (sign);
24410 rtx sgn = gen_reg_rtx (mode);
24411 if (mask == NULL_RTX)
24413 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24414 if (!VECTOR_MODE_P (mode))
24416 /* We need to generate a scalar mode mask in this case. */
24417 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24418 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24419 mask = gen_reg_rtx (mode);
24420 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24424 mask = gen_rtx_NOT (mode, mask);
24425 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24426 gen_rtx_AND (mode, mask, sign)));
24427 emit_insn (gen_rtx_SET (VOIDmode, result,
24428 gen_rtx_IOR (mode, abs_value, sgn)));
24431 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24432 mask for masking out the sign-bit is stored in *SMASK, if that is
24435 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24437 enum machine_mode mode = GET_MODE (op0);
24440 xa = gen_reg_rtx (mode);
24441 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24442 if (!VECTOR_MODE_P (mode))
24444 /* We need to generate a scalar mode mask in this case. */
24445 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24446 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24447 mask = gen_reg_rtx (mode);
24448 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24450 emit_insn (gen_rtx_SET (VOIDmode, xa,
24451 gen_rtx_AND (mode, op0, mask)));
24459 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24460 swapping the operands if SWAP_OPERANDS is true. The expanded
24461 code is a forward jump to a newly created label in case the
24462 comparison is true. The generated label rtx is returned. */
24464 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24465 bool swap_operands)
24476 label = gen_label_rtx ();
24477 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24478 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24479 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24480 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24481 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24482 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24483 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24484 JUMP_LABEL (tmp) = label;
24489 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24490 using comparison code CODE. Operands are swapped for the comparison if
24491 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24493 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24494 bool swap_operands)
24496 enum machine_mode mode = GET_MODE (op0);
24497 rtx mask = gen_reg_rtx (mode);
24506 if (mode == DFmode)
24507 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24508 gen_rtx_fmt_ee (code, mode, op0, op1)));
24510 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24511 gen_rtx_fmt_ee (code, mode, op0, op1)));
24516 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24517 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24519 ix86_gen_TWO52 (enum machine_mode mode)
24521 REAL_VALUE_TYPE TWO52r;
24524 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24525 TWO52 = const_double_from_real_value (TWO52r, mode);
24526 TWO52 = force_reg (mode, TWO52);
24531 /* Expand SSE sequence for computing lround from OP1 storing
24534 ix86_expand_lround (rtx op0, rtx op1)
24536 /* C code for the stuff we're doing below:
24537 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24540 enum machine_mode mode = GET_MODE (op1);
24541 const struct real_format *fmt;
24542 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24545 /* load nextafter (0.5, 0.0) */
24546 fmt = REAL_MODE_FORMAT (mode);
24547 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24548 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24550 /* adj = copysign (0.5, op1) */
24551 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24552 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24554 /* adj = op1 + adj */
24555 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24557 /* op0 = (imode)adj */
24558 expand_fix (op0, adj, 0);
24561 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24564 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24566 /* C code for the stuff we're doing below (for do_floor):
24568 xi -= (double)xi > op1 ? 1 : 0;
24571 enum machine_mode fmode = GET_MODE (op1);
24572 enum machine_mode imode = GET_MODE (op0);
24573 rtx ireg, freg, label, tmp;
24575 /* reg = (long)op1 */
24576 ireg = gen_reg_rtx (imode);
24577 expand_fix (ireg, op1, 0);
24579 /* freg = (double)reg */
24580 freg = gen_reg_rtx (fmode);
24581 expand_float (freg, ireg, 0);
24583 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24584 label = ix86_expand_sse_compare_and_jump (UNLE,
24585 freg, op1, !do_floor);
24586 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24587 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24588 emit_move_insn (ireg, tmp);
24590 emit_label (label);
24591 LABEL_NUSES (label) = 1;
24593 emit_move_insn (op0, ireg);
24596 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24597 result in OPERAND0. */
24599 ix86_expand_rint (rtx operand0, rtx operand1)
24601 /* C code for the stuff we're doing below:
24602 xa = fabs (operand1);
24603 if (!isless (xa, 2**52))
24605 xa = xa + 2**52 - 2**52;
24606 return copysign (xa, operand1);
24608 enum machine_mode mode = GET_MODE (operand0);
24609 rtx res, xa, label, TWO52, mask;
24611 res = gen_reg_rtx (mode);
24612 emit_move_insn (res, operand1);
24614 /* xa = abs (operand1) */
24615 xa = ix86_expand_sse_fabs (res, &mask);
24617 /* if (!isless (xa, TWO52)) goto label; */
24618 TWO52 = ix86_gen_TWO52 (mode);
24619 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24621 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24622 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24624 ix86_sse_copysign_to_positive (res, xa, res, mask);
24626 emit_label (label);
24627 LABEL_NUSES (label) = 1;
24629 emit_move_insn (operand0, res);
24632 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24635 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24637 /* C code for the stuff we expand below.
24638 double xa = fabs (x), x2;
24639 if (!isless (xa, TWO52))
24641 xa = xa + TWO52 - TWO52;
24642 x2 = copysign (xa, x);
24651 enum machine_mode mode = GET_MODE (operand0);
24652 rtx xa, TWO52, tmp, label, one, res, mask;
24654 TWO52 = ix86_gen_TWO52 (mode);
24656 /* Temporary for holding the result, initialized to the input
24657 operand to ease control flow. */
24658 res = gen_reg_rtx (mode);
24659 emit_move_insn (res, operand1);
24661 /* xa = abs (operand1) */
24662 xa = ix86_expand_sse_fabs (res, &mask);
24664 /* if (!isless (xa, TWO52)) goto label; */
24665 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24667 /* xa = xa + TWO52 - TWO52; */
24668 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24669 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24671 /* xa = copysign (xa, operand1) */
24672 ix86_sse_copysign_to_positive (xa, xa, res, mask);
24674 /* generate 1.0 or -1.0 */
24675 one = force_reg (mode,
24676 const_double_from_real_value (do_floor
24677 ? dconst1 : dconstm1, mode));
24679 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24680 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24681 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24682 gen_rtx_AND (mode, one, tmp)));
24683 /* We always need to subtract here to preserve signed zero. */
24684 tmp = expand_simple_binop (mode, MINUS,
24685 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24686 emit_move_insn (res, tmp);
24688 emit_label (label);
24689 LABEL_NUSES (label) = 1;
24691 emit_move_insn (operand0, res);
24694 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24697 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
24699 /* C code for the stuff we expand below.
24700 double xa = fabs (x), x2;
24701 if (!isless (xa, TWO52))
24703 x2 = (double)(long)x;
24710 if (HONOR_SIGNED_ZEROS (mode))
24711 return copysign (x2, x);
24714 enum machine_mode mode = GET_MODE (operand0);
24715 rtx xa, xi, TWO52, tmp, label, one, res, mask;
24717 TWO52 = ix86_gen_TWO52 (mode);
24719 /* Temporary for holding the result, initialized to the input
24720 operand to ease control flow. */
24721 res = gen_reg_rtx (mode);
24722 emit_move_insn (res, operand1);
24724 /* xa = abs (operand1) */
24725 xa = ix86_expand_sse_fabs (res, &mask);
24727 /* if (!isless (xa, TWO52)) goto label; */
24728 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24730 /* xa = (double)(long)x */
24731 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24732 expand_fix (xi, res, 0);
24733 expand_float (xa, xi, 0);
24736 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24738 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24739 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24740 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24741 gen_rtx_AND (mode, one, tmp)));
24742 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
24743 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24744 emit_move_insn (res, tmp);
24746 if (HONOR_SIGNED_ZEROS (mode))
24747 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24749 emit_label (label);
24750 LABEL_NUSES (label) = 1;
24752 emit_move_insn (operand0, res);
24755 /* Expand SSE sequence for computing round from OPERAND1 storing
24756 into OPERAND0. Sequence that works without relying on DImode truncation
24757 via cvttsd2siq that is only available on 64bit targets. */
24759 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
24761 /* C code for the stuff we expand below.
24762 double xa = fabs (x), xa2, x2;
24763 if (!isless (xa, TWO52))
24765 Using the absolute value and copying back sign makes
24766 -0.0 -> -0.0 correct.
24767 xa2 = xa + TWO52 - TWO52;
24772 else if (dxa > 0.5)
24774 x2 = copysign (xa2, x);
24777 enum machine_mode mode = GET_MODE (operand0);
24778 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
24780 TWO52 = ix86_gen_TWO52 (mode);
24782 /* Temporary for holding the result, initialized to the input
24783 operand to ease control flow. */
24784 res = gen_reg_rtx (mode);
24785 emit_move_insn (res, operand1);
24787 /* xa = abs (operand1) */
24788 xa = ix86_expand_sse_fabs (res, &mask);
24790 /* if (!isless (xa, TWO52)) goto label; */
24791 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24793 /* xa2 = xa + TWO52 - TWO52; */
24794 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24795 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
24797 /* dxa = xa2 - xa; */
24798 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
24800 /* generate 0.5, 1.0 and -0.5 */
24801 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
24802 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
24803 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
24807 tmp = gen_reg_rtx (mode);
24808 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24809 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
24810 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24811 gen_rtx_AND (mode, one, tmp)));
24812 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24813 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24814 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
24815 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24816 gen_rtx_AND (mode, one, tmp)));
24817 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24819 /* res = copysign (xa2, operand1) */
24820 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
24822 emit_label (label);
24823 LABEL_NUSES (label) = 1;
24825 emit_move_insn (operand0, res);
24828 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24831 ix86_expand_trunc (rtx operand0, rtx operand1)
24833 /* C code for SSE variant we expand below.
24834 double xa = fabs (x), x2;
24835 if (!isless (xa, TWO52))
24837 x2 = (double)(long)x;
24838 if (HONOR_SIGNED_ZEROS (mode))
24839 return copysign (x2, x);
24842 enum machine_mode mode = GET_MODE (operand0);
24843 rtx xa, xi, TWO52, label, res, mask;
24845 TWO52 = ix86_gen_TWO52 (mode);
24847 /* Temporary for holding the result, initialized to the input
24848 operand to ease control flow. */
24849 res = gen_reg_rtx (mode);
24850 emit_move_insn (res, operand1);
24852 /* xa = abs (operand1) */
24853 xa = ix86_expand_sse_fabs (res, &mask);
24855 /* if (!isless (xa, TWO52)) goto label; */
24856 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24858 /* x = (double)(long)x */
24859 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24860 expand_fix (xi, res, 0);
24861 expand_float (res, xi, 0);
24863 if (HONOR_SIGNED_ZEROS (mode))
24864 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24866 emit_label (label);
24867 LABEL_NUSES (label) = 1;
24869 emit_move_insn (operand0, res);
24872 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24875 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
24877 enum machine_mode mode = GET_MODE (operand0);
24878 rtx xa, mask, TWO52, label, one, res, smask, tmp;
24880 /* C code for SSE variant we expand below.
24881 double xa = fabs (x), x2;
24882 if (!isless (xa, TWO52))
24884 xa2 = xa + TWO52 - TWO52;
24888 x2 = copysign (xa2, x);
24892 TWO52 = ix86_gen_TWO52 (mode);
24894 /* Temporary for holding the result, initialized to the input
24895 operand to ease control flow. */
24896 res = gen_reg_rtx (mode);
24897 emit_move_insn (res, operand1);
24899 /* xa = abs (operand1) */
24900 xa = ix86_expand_sse_fabs (res, &smask);
24902 /* if (!isless (xa, TWO52)) goto label; */
24903 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24905 /* res = xa + TWO52 - TWO52; */
24906 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24907 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
24908 emit_move_insn (res, tmp);
24911 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24913 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24914 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
24915 emit_insn (gen_rtx_SET (VOIDmode, mask,
24916 gen_rtx_AND (mode, mask, one)));
24917 tmp = expand_simple_binop (mode, MINUS,
24918 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
24919 emit_move_insn (res, tmp);
24921 /* res = copysign (res, operand1) */
24922 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
24924 emit_label (label);
24925 LABEL_NUSES (label) = 1;
24927 emit_move_insn (operand0, res);
24930 /* Expand SSE sequence for computing round from OPERAND1 storing
24933 ix86_expand_round (rtx operand0, rtx operand1)
24935 /* C code for the stuff we're doing below:
24936 double xa = fabs (x);
24937 if (!isless (xa, TWO52))
24939 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24940 return copysign (xa, x);
24942 enum machine_mode mode = GET_MODE (operand0);
24943 rtx res, TWO52, xa, label, xi, half, mask;
24944 const struct real_format *fmt;
24945 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24947 /* Temporary for holding the result, initialized to the input
24948 operand to ease control flow. */
24949 res = gen_reg_rtx (mode);
24950 emit_move_insn (res, operand1);
24952 TWO52 = ix86_gen_TWO52 (mode);
24953 xa = ix86_expand_sse_fabs (res, &mask);
24954 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24956 /* load nextafter (0.5, 0.0) */
24957 fmt = REAL_MODE_FORMAT (mode);
24958 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24959 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24961 /* xa = xa + 0.5 */
24962 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
24963 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
24965 /* xa = (double)(int64_t)xa */
24966 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24967 expand_fix (xi, xa, 0);
24968 expand_float (xa, xi, 0);
24970 /* res = copysign (xa, operand1) */
24971 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
24973 emit_label (label);
24974 LABEL_NUSES (label) = 1;
24976 emit_move_insn (operand0, res);
24980 /* Validate whether a SSE5 instruction is valid or not.
24981 OPERANDS is the array of operands.
24982 NUM is the number of operands.
24983 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
24984 NUM_MEMORY is the maximum number of memory operands to accept. */
24986 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
24992 /* Count the number of memory arguments */
24995 for (i = 0; i < num; i++)
24997 enum machine_mode mode = GET_MODE (operands[i]);
24998 if (register_operand (operands[i], mode))
25001 else if (memory_operand (operands[i], mode))
25003 mem_mask |= (1 << i);
25009 rtx pattern = PATTERN (insn);
25011 /* allow 0 for pcmov */
25012 if (GET_CODE (pattern) != SET
25013 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25015 || operands[i] != CONST0_RTX (mode))
25020 /* If there were no memory operations, allow the insn */
25024 /* Do not allow the destination register to be a memory operand. */
25025 else if (mem_mask & (1 << 0))
25028 /* If there are too many memory operations, disallow the instruction. While
25029 the hardware only allows 1 memory reference, before register allocation
25030 for some insns, we allow two memory operations sometimes in order to allow
25031 code like the following to be optimized:
25033 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25035 or similar cases that are vectorized into using the fmaddss
25037 else if (mem_count > num_memory)
25040 /* Don't allow more than one memory operation if not optimizing. */
25041 else if (mem_count > 1 && !optimize)
25044 else if (num == 4 && mem_count == 1)
25046 /* formats (destination is the first argument), example fmaddss:
25047 xmm1, xmm1, xmm2, xmm3/mem
25048 xmm1, xmm1, xmm2/mem, xmm3
25049 xmm1, xmm2, xmm3/mem, xmm1
25050 xmm1, xmm2/mem, xmm3, xmm1 */
25052 return ((mem_mask == (1 << 1))
25053 || (mem_mask == (1 << 2))
25054 || (mem_mask == (1 << 3)));
25056 /* format, example pmacsdd:
25057 xmm1, xmm2, xmm3/mem, xmm1 */
25059 return (mem_mask == (1 << 2));
25062 else if (num == 4 && num_memory == 2)
25064 /* If there are two memory operations, we can load one of the memory ops
25065 into the destination register. This is for optimizing the
25066 multiply/add ops, which the combiner has optimized both the multiply
25067 and the add insns to have a memory operation. We have to be careful
25068 that the destination doesn't overlap with the inputs. */
25069 rtx op0 = operands[0];
25071 if (reg_mentioned_p (op0, operands[1])
25072 || reg_mentioned_p (op0, operands[2])
25073 || reg_mentioned_p (op0, operands[3]))
25076 /* formats (destination is the first argument), example fmaddss:
25077 xmm1, xmm1, xmm2, xmm3/mem
25078 xmm1, xmm1, xmm2/mem, xmm3
25079 xmm1, xmm2, xmm3/mem, xmm1
25080 xmm1, xmm2/mem, xmm3, xmm1
25082 For the oc0 case, we will load either operands[1] or operands[3] into
25083 operands[0], so any combination of 2 memory operands is ok. */
25087 /* format, example pmacsdd:
25088 xmm1, xmm2, xmm3/mem, xmm1
25090 For the integer multiply/add instructions be more restrictive and
25091 require operands[2] and operands[3] to be the memory operands. */
25093 return (mem_mask == ((1 << 2) | (1 << 3)));
25096 else if (num == 3 && num_memory == 1)
25098 /* formats, example protb:
25099 xmm1, xmm2, xmm3/mem
25100 xmm1, xmm2/mem, xmm3 */
25102 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25104 /* format, example comeq:
25105 xmm1, xmm2, xmm3/mem */
25107 return (mem_mask == (1 << 2));
25111 gcc_unreachable ();
25117 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25118 hardware will allow by using the destination register to load one of the
25119 memory operations. Presently this is used by the multiply/add routines to
25120 allow 2 memory references. */
25123 ix86_expand_sse5_multiple_memory (rtx operands[],
25125 enum machine_mode mode)
25127 rtx op0 = operands[0];
25129 || memory_operand (op0, mode)
25130 || reg_mentioned_p (op0, operands[1])
25131 || reg_mentioned_p (op0, operands[2])
25132 || reg_mentioned_p (op0, operands[3]))
25133 gcc_unreachable ();
25135 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25136 the destination register. */
25137 if (memory_operand (operands[1], mode))
25139 emit_move_insn (op0, operands[1]);
25142 else if (memory_operand (operands[3], mode))
25144 emit_move_insn (op0, operands[3]);
25148 gcc_unreachable ();
25154 /* Table of valid machine attributes. */
25155 static const struct attribute_spec ix86_attribute_table[] =
25157 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25158 /* Stdcall attribute says callee is responsible for popping arguments
25159 if they are not variable. */
25160 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25161 /* Fastcall attribute says callee is responsible for popping arguments
25162 if they are not variable. */
25163 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25164 /* Cdecl attribute says the callee is a normal C declaration */
25165 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25166 /* Regparm attribute specifies how many integer arguments are to be
25167 passed in registers. */
25168 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25169 /* Sseregparm attribute says we are using x86_64 calling conventions
25170 for FP arguments. */
25171 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25172 /* force_align_arg_pointer says this function realigns the stack at entry. */
25173 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25174 false, true, true, ix86_handle_cconv_attribute },
25175 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25176 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25177 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25178 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25180 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25181 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25182 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25183 SUBTARGET_ATTRIBUTE_TABLE,
25185 { NULL, 0, 0, false, false, false, NULL }
25188 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25190 x86_builtin_vectorization_cost (bool runtime_test)
25192 /* If the branch of the runtime test is taken - i.e. - the vectorized
25193 version is skipped - this incurs a misprediction cost (because the
25194 vectorized version is expected to be the fall-through). So we subtract
25195 the latency of a mispredicted branch from the costs that are incured
25196 when the vectorized version is executed.
25198 TODO: The values in individual target tables have to be tuned or new
25199 fields may be needed. For eg. on K8, the default branch path is the
25200 not-taken path. If the taken path is predicted correctly, the minimum
25201 penalty of going down the taken-path is 1 cycle. If the taken-path is
25202 not predicted correctly, then the minimum penalty is 10 cycles. */
25206 return (-(ix86_cost->cond_taken_branch_cost));
25212 /* Initialize the GCC target structure. */
25213 #undef TARGET_ATTRIBUTE_TABLE
25214 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25215 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25216 # undef TARGET_MERGE_DECL_ATTRIBUTES
25217 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25220 #undef TARGET_COMP_TYPE_ATTRIBUTES
25221 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25223 #undef TARGET_INIT_BUILTINS
25224 #define TARGET_INIT_BUILTINS ix86_init_builtins
25225 #undef TARGET_EXPAND_BUILTIN
25226 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25228 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25229 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25230 ix86_builtin_vectorized_function
25232 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25233 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25235 #undef TARGET_BUILTIN_RECIPROCAL
25236 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25238 #undef TARGET_ASM_FUNCTION_EPILOGUE
25239 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25241 #undef TARGET_ENCODE_SECTION_INFO
25242 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25243 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25245 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25248 #undef TARGET_ASM_OPEN_PAREN
25249 #define TARGET_ASM_OPEN_PAREN ""
25250 #undef TARGET_ASM_CLOSE_PAREN
25251 #define TARGET_ASM_CLOSE_PAREN ""
25253 #undef TARGET_ASM_ALIGNED_HI_OP
25254 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25255 #undef TARGET_ASM_ALIGNED_SI_OP
25256 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25258 #undef TARGET_ASM_ALIGNED_DI_OP
25259 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25262 #undef TARGET_ASM_UNALIGNED_HI_OP
25263 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25264 #undef TARGET_ASM_UNALIGNED_SI_OP
25265 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25266 #undef TARGET_ASM_UNALIGNED_DI_OP
25267 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25269 #undef TARGET_SCHED_ADJUST_COST
25270 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25271 #undef TARGET_SCHED_ISSUE_RATE
25272 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25273 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25274 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25275 ia32_multipass_dfa_lookahead
25277 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25278 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25281 #undef TARGET_HAVE_TLS
25282 #define TARGET_HAVE_TLS true
25284 #undef TARGET_CANNOT_FORCE_CONST_MEM
25285 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25286 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25287 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25289 #undef TARGET_DELEGITIMIZE_ADDRESS
25290 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25292 #undef TARGET_MS_BITFIELD_LAYOUT_P
25293 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25296 #undef TARGET_BINDS_LOCAL_P
25297 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25299 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25300 #undef TARGET_BINDS_LOCAL_P
25301 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25304 #undef TARGET_ASM_OUTPUT_MI_THUNK
25305 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25306 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25307 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25309 #undef TARGET_ASM_FILE_START
25310 #define TARGET_ASM_FILE_START x86_file_start
25312 #undef TARGET_DEFAULT_TARGET_FLAGS
25313 #define TARGET_DEFAULT_TARGET_FLAGS \
25315 | TARGET_SUBTARGET_DEFAULT \
25316 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25318 #undef TARGET_HANDLE_OPTION
25319 #define TARGET_HANDLE_OPTION ix86_handle_option
25321 #undef TARGET_RTX_COSTS
25322 #define TARGET_RTX_COSTS ix86_rtx_costs
25323 #undef TARGET_ADDRESS_COST
25324 #define TARGET_ADDRESS_COST ix86_address_cost
25326 #undef TARGET_FIXED_CONDITION_CODE_REGS
25327 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25328 #undef TARGET_CC_MODES_COMPATIBLE
25329 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25331 #undef TARGET_MACHINE_DEPENDENT_REORG
25332 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25334 #undef TARGET_BUILD_BUILTIN_VA_LIST
25335 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25337 #undef TARGET_EXPAND_BUILTIN_VA_START
25338 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25340 #undef TARGET_MD_ASM_CLOBBERS
25341 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25343 #undef TARGET_PROMOTE_PROTOTYPES
25344 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25345 #undef TARGET_STRUCT_VALUE_RTX
25346 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25347 #undef TARGET_SETUP_INCOMING_VARARGS
25348 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25349 #undef TARGET_MUST_PASS_IN_STACK
25350 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25351 #undef TARGET_PASS_BY_REFERENCE
25352 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25353 #undef TARGET_INTERNAL_ARG_POINTER
25354 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25355 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25356 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25357 #undef TARGET_STRICT_ARGUMENT_NAMING
25358 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25360 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25361 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25363 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25364 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25366 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25367 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25369 #undef TARGET_C_MODE_FOR_SUFFIX
25370 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25373 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25374 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25377 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25378 #undef TARGET_INSERT_ATTRIBUTES
25379 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25382 #undef TARGET_MANGLE_TYPE
25383 #define TARGET_MANGLE_TYPE ix86_mangle_type
25385 #undef TARGET_STACK_PROTECT_FAIL
25386 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25388 #undef TARGET_FUNCTION_VALUE
25389 #define TARGET_FUNCTION_VALUE ix86_function_value
25391 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25392 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25394 struct gcc_target targetm = TARGET_INITIALIZER;
25396 #include "gt-i386.h"