1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 static int x86_builtin_vectorization_cost (bool);
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
76 struct processor_costs size_cost = { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
130 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
145 /* Processor costs (relative to an add) */
147 struct processor_costs i386_cost = { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
200 DUMMY_STRINGOP_ALGS},
201 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
202 DUMMY_STRINGOP_ALGS},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
217 struct processor_costs i486_cost = { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
272 DUMMY_STRINGOP_ALGS},
273 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
274 DUMMY_STRINGOP_ALGS},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
289 struct processor_costs pentium_cost = {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
342 DUMMY_STRINGOP_ALGS},
343 {{libcall, {{-1, rep_prefix_4_byte}}},
344 DUMMY_STRINGOP_ALGS},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
359 struct processor_costs pentiumpro_cost = {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
416 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
417 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
418 DUMMY_STRINGOP_ALGS},
419 {{rep_prefix_4_byte, {{1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
436 struct processor_costs geode_cost = {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
490 DUMMY_STRINGOP_ALGS},
491 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
492 DUMMY_STRINGOP_ALGS},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
507 struct processor_costs k6_cost = {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
563 DUMMY_STRINGOP_ALGS},
564 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
565 DUMMY_STRINGOP_ALGS},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
580 struct processor_costs athlon_cost = {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
636 DUMMY_STRINGOP_ALGS},
637 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs k8_cost = {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
702 100, /* number of parallel prefetches */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
714 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
715 {{libcall, {{8, loop}, {24, unrolled_loop},
716 {2048, rep_prefix_4_byte}, {-1, libcall}}},
717 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
731 struct processor_costs amdfam10_cost = {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
776 MOVD reg64, xmmreg Double FADD 3
778 MOVD reg32, xmmreg Double FADD 3
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
788 100, /* number of parallel prefetches */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
801 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
802 {{libcall, {{8, loop}, {24, unrolled_loop},
803 {2048, rep_prefix_4_byte}, {-1, libcall}}},
804 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
819 struct processor_costs pentium4_cost = {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
872 DUMMY_STRINGOP_ALGS},
873 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
875 DUMMY_STRINGOP_ALGS},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
890 struct processor_costs nocona_cost = {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
943 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
944 {100000, unrolled_loop}, {-1, libcall}}}},
945 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
947 {libcall, {{24, loop}, {64, unrolled_loop},
948 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
963 struct processor_costs core2_cost = {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1015 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1016 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1017 {{libcall, {{8, loop}, {15, unrolled_loop},
1018 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1019 {libcall, {{24, loop}, {32, unrolled_loop},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1036 struct processor_costs generic64_cost = {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS,
1095 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1096 {DUMMY_STRINGOP_ALGS,
1097 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1113 struct processor_costs generic32_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1166 DUMMY_STRINGOP_ALGS},
1167 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1168 DUMMY_STRINGOP_ALGS},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1182 const struct processor_costs *ix86_cost = &pentium_cost;
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1219 | m_NOCONA | m_CORE2 | m_GENERIC,
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1224 /* X86_TUNE_USE_BIT_TEST */
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1244 | m_NOCONA | m_CORE2 | m_GENERIC,
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1249 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2 | m_GENERIC,
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386 | m_486 | m_K6_GEODE,
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1270 /* X86_TUNE_USE_MOV0 */
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1285 /* X86_TUNE_READ_MODIFY */
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1290 | m_GENERIC /* | m_PENT4 ? */,
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT | m_486 | m_386),
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386 | m_PENT4 | m_NOCONA,
1298 /* X86_TUNE_QIMODE_MATH */
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1318 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1325 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1330 | m_GENERIC | m_GEODE),
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO | m_PENT4 | m_NOCONA,
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_SHIFT1 */
1373 /* X86_TUNE_USE_FFREEP */
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_AMD_MULTIPLE | m_GENERIC),
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1389 /* X86_TUNE_USE_BT */
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8 | m_GENERIC64 | m_AMDFAM10,
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1417 m_K8 | m_GENERIC64 | m_AMDFAM10,
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1433 from integer to FP. */
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386 | m_486 | m_PENT | m_K6),
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1460 | m_NOCONA | m_CORE2 | m_GENERIC;
1462 static enum stringop_alg stringop_alg = no_stringop;
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1471 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1472 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1477 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1479 /* ax, dx, cx, bx */
1480 AREG, DREG, CREG, BREG,
1481 /* si, di, bp, sp */
1482 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1484 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1485 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1491 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1494 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1497 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1498 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1499 /* SSE REX registers */
1500 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1504 /* The "default" register map used in 32bit mode. */
1506 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1517 static int const x86_64_int_parameter_registers[6] =
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1523 static int const x86_64_ms_abi_int_parameter_registers[4] =
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1529 static int const x86_64_int_return_registers[4] =
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1600 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1614 rtx ix86_compare_op0 = NULL_RTX;
1615 rtx ix86_compare_op1 = NULL_RTX;
1616 rtx ix86_compare_emitted = NULL_RTX;
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1621 /* Define the structure for the machine field in struct function. */
1623 struct stack_local_entry GTY(())
1625 unsigned short mode;
1628 struct stack_local_entry *next;
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1655 HOST_WIDE_INT frame;
1657 int outgoing_arguments_size;
1660 HOST_WIDE_INT to_allocate;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset;
1663 HOST_WIDE_INT hard_frame_pointer_offset;
1664 HOST_WIDE_INT stack_pointer_offset;
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov;
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel;
1674 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1676 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath;
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune;
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch;
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse;
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm;
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer;
1695 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary;
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost;
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1706 int ix86_section_threshold = 65536;
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix[16];
1710 int internal_label_prefix_len;
1712 /* Fence to use after loop using movnt. */
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1725 X86_64_INTEGER_CLASS,
1726 X86_64_INTEGERSI_CLASS,
1733 X86_64_COMPLEX_X87_CLASS,
1736 static const char * const x86_64_reg_class_name[] =
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1742 #define MAX_CLASSES 4
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1746 static bool ext_80387_constants_init = 0;
1749 static struct machine_function * ix86_init_machine_status (void);
1750 static rtx ix86_function_value (const_tree, const_tree, bool);
1751 static int ix86_function_regparm (const_tree, const_tree);
1752 static void ix86_compute_frame_layout (struct ix86_frame *);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit;
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1797 #define OPTION_MASK_ISA_SSE5_UNSET \
1798 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1800 /* Vectorization library interface and handlers. */
1801 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1802 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1804 /* Implement TARGET_HANDLE_OPTION. */
1807 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1812 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1815 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1816 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1821 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1824 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1825 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1833 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1836 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1837 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1842 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1845 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1846 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1851 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1854 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1855 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1860 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1863 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1864 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1869 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1872 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1873 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1881 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1882 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1887 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1888 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1892 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1893 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1897 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1900 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1901 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1906 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
1909 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1910 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1919 /* Sometimes certain combinations of command options do not make
1920 sense on a particular target machine. You can define a macro
1921 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1922 defined, is executed once just after all the command options have
1925 Don't use this macro to turn on various extra optimizations for
1926 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1929 override_options (void)
1932 int ix86_tune_defaulted = 0;
1933 int ix86_arch_specified = 0;
1934 unsigned int ix86_arch_mask, ix86_tune_mask;
1936 /* Comes from final.c -- no real reason to change it. */
1937 #define MAX_CODE_ALIGN 16
1941 const struct processor_costs *cost; /* Processor costs */
1942 const int align_loop; /* Default alignments. */
1943 const int align_loop_max_skip;
1944 const int align_jump;
1945 const int align_jump_max_skip;
1946 const int align_func;
1948 const processor_target_table[PROCESSOR_max] =
1950 {&i386_cost, 4, 3, 4, 3, 4},
1951 {&i486_cost, 16, 15, 16, 15, 16},
1952 {&pentium_cost, 16, 7, 16, 7, 16},
1953 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1954 {&geode_cost, 0, 0, 0, 0, 0},
1955 {&k6_cost, 32, 7, 32, 7, 32},
1956 {&athlon_cost, 16, 7, 16, 7, 16},
1957 {&pentium4_cost, 0, 0, 0, 0, 0},
1958 {&k8_cost, 16, 7, 16, 7, 16},
1959 {&nocona_cost, 0, 0, 0, 0, 0},
1960 {&core2_cost, 16, 10, 16, 10, 16},
1961 {&generic32_cost, 16, 7, 16, 7, 16},
1962 {&generic64_cost, 16, 10, 16, 10, 16},
1963 {&amdfam10_cost, 32, 24, 32, 7, 32}
1966 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1997 PTA_PREFETCH_SSE = 1 << 4,
1999 PTA_3DNOW_A = 1 << 6,
2003 PTA_POPCNT = 1 << 10,
2005 PTA_SSE4A = 1 << 12,
2006 PTA_NO_SAHF = 1 << 13,
2007 PTA_SSE4_1 = 1 << 14,
2008 PTA_SSE4_2 = 1 << 15,
2014 const char *const name; /* processor name or nickname. */
2015 const enum processor_type processor;
2016 const unsigned /*enum pta_flags*/ flags;
2018 const processor_alias_table[] =
2020 {"i386", PROCESSOR_I386, 0},
2021 {"i486", PROCESSOR_I486, 0},
2022 {"i586", PROCESSOR_PENTIUM, 0},
2023 {"pentium", PROCESSOR_PENTIUM, 0},
2024 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2025 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2026 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2027 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2028 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2029 {"i686", PROCESSOR_PENTIUMPRO, 0},
2030 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2031 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2032 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2033 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2034 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2035 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2036 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2037 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2038 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2039 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2040 | PTA_CX16 | PTA_NO_SAHF)},
2041 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2042 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2045 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2046 |PTA_PREFETCH_SSE)},
2047 {"k6", PROCESSOR_K6, PTA_MMX},
2048 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2049 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2050 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2051 | PTA_PREFETCH_SSE)},
2052 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2053 | PTA_PREFETCH_SSE)},
2054 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2056 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2058 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2060 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2061 | PTA_MMX | PTA_SSE | PTA_SSE2
2063 {"k8", PROCESSOR_K8, (PTA_64BIT
2064 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2065 | PTA_SSE | PTA_SSE2
2067 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2068 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2069 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2071 {"opteron", PROCESSOR_K8, (PTA_64BIT
2072 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2073 | PTA_SSE | PTA_SSE2
2075 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2076 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2077 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2079 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2080 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2081 | PTA_SSE | PTA_SSE2
2083 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2084 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2085 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2087 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2088 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2089 | PTA_SSE | PTA_SSE2
2091 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2092 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2093 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2095 | PTA_CX16 | PTA_ABM)},
2096 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2097 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2098 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2100 | PTA_CX16 | PTA_ABM)},
2101 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2102 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2105 int const pta_size = ARRAY_SIZE (processor_alias_table);
2107 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2108 SUBTARGET_OVERRIDE_OPTIONS;
2111 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2112 SUBSUBTARGET_OVERRIDE_OPTIONS;
2115 /* -fPIC is the default for x86_64. */
2116 if (TARGET_MACHO && TARGET_64BIT)
2119 /* Set the default values for switches whose default depends on TARGET_64BIT
2120 in case they weren't overwritten by command line options. */
2123 /* Mach-O doesn't support omitting the frame pointer for now. */
2124 if (flag_omit_frame_pointer == 2)
2125 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2126 if (flag_asynchronous_unwind_tables == 2)
2127 flag_asynchronous_unwind_tables = 1;
2128 if (flag_pcc_struct_return == 2)
2129 flag_pcc_struct_return = 0;
2133 if (flag_omit_frame_pointer == 2)
2134 flag_omit_frame_pointer = 0;
2135 if (flag_asynchronous_unwind_tables == 2)
2136 flag_asynchronous_unwind_tables = 0;
2137 if (flag_pcc_struct_return == 2)
2138 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2141 /* Need to check -mtune=generic first. */
2142 if (ix86_tune_string)
2144 if (!strcmp (ix86_tune_string, "generic")
2145 || !strcmp (ix86_tune_string, "i686")
2146 /* As special support for cross compilers we read -mtune=native
2147 as -mtune=generic. With native compilers we won't see the
2148 -mtune=native, as it was changed by the driver. */
2149 || !strcmp (ix86_tune_string, "native"))
2152 ix86_tune_string = "generic64";
2154 ix86_tune_string = "generic32";
2156 else if (!strncmp (ix86_tune_string, "generic", 7))
2157 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2161 if (ix86_arch_string)
2162 ix86_tune_string = ix86_arch_string;
2163 if (!ix86_tune_string)
2165 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2166 ix86_tune_defaulted = 1;
2169 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2170 need to use a sensible tune option. */
2171 if (!strcmp (ix86_tune_string, "generic")
2172 || !strcmp (ix86_tune_string, "x86-64")
2173 || !strcmp (ix86_tune_string, "i686"))
2176 ix86_tune_string = "generic64";
2178 ix86_tune_string = "generic32";
2181 if (ix86_stringop_string)
2183 if (!strcmp (ix86_stringop_string, "rep_byte"))
2184 stringop_alg = rep_prefix_1_byte;
2185 else if (!strcmp (ix86_stringop_string, "libcall"))
2186 stringop_alg = libcall;
2187 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2188 stringop_alg = rep_prefix_4_byte;
2189 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2190 stringop_alg = rep_prefix_8_byte;
2191 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2192 stringop_alg = loop_1_byte;
2193 else if (!strcmp (ix86_stringop_string, "loop"))
2194 stringop_alg = loop;
2195 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2196 stringop_alg = unrolled_loop;
2198 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2200 if (!strcmp (ix86_tune_string, "x86-64"))
2201 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2202 "-mtune=generic instead as appropriate.");
2204 if (!ix86_arch_string)
2205 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2207 ix86_arch_specified = 1;
2209 if (!strcmp (ix86_arch_string, "generic"))
2210 error ("generic CPU can be used only for -mtune= switch");
2211 if (!strncmp (ix86_arch_string, "generic", 7))
2212 error ("bad value (%s) for -march= switch", ix86_arch_string);
2214 if (ix86_cmodel_string != 0)
2216 if (!strcmp (ix86_cmodel_string, "small"))
2217 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2218 else if (!strcmp (ix86_cmodel_string, "medium"))
2219 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2220 else if (!strcmp (ix86_cmodel_string, "large"))
2221 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2223 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2224 else if (!strcmp (ix86_cmodel_string, "32"))
2225 ix86_cmodel = CM_32;
2226 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2227 ix86_cmodel = CM_KERNEL;
2229 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2233 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2234 use of rip-relative addressing. This eliminates fixups that
2235 would otherwise be needed if this object is to be placed in a
2236 DLL, and is essentially just as efficient as direct addressing. */
2237 if (TARGET_64BIT_MS_ABI)
2238 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2239 else if (TARGET_64BIT)
2240 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2242 ix86_cmodel = CM_32;
2244 if (ix86_asm_string != 0)
2247 && !strcmp (ix86_asm_string, "intel"))
2248 ix86_asm_dialect = ASM_INTEL;
2249 else if (!strcmp (ix86_asm_string, "att"))
2250 ix86_asm_dialect = ASM_ATT;
2252 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2254 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2255 error ("code model %qs not supported in the %s bit mode",
2256 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2257 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2258 sorry ("%i-bit mode not compiled in",
2259 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2261 for (i = 0; i < pta_size; i++)
2262 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2264 ix86_arch = processor_alias_table[i].processor;
2265 /* Default cpu tuning to the architecture. */
2266 ix86_tune = ix86_arch;
2268 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2269 error ("CPU you selected does not support x86-64 "
2272 if (processor_alias_table[i].flags & PTA_MMX
2273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2274 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2275 if (processor_alias_table[i].flags & PTA_3DNOW
2276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2277 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2278 if (processor_alias_table[i].flags & PTA_3DNOW_A
2279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2280 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2281 if (processor_alias_table[i].flags & PTA_SSE
2282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2283 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2284 if (processor_alias_table[i].flags & PTA_SSE2
2285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2286 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2287 if (processor_alias_table[i].flags & PTA_SSE3
2288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2289 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2290 if (processor_alias_table[i].flags & PTA_SSSE3
2291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2292 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2293 if (processor_alias_table[i].flags & PTA_SSE4_1
2294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2295 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2296 if (processor_alias_table[i].flags & PTA_SSE4_2
2297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2298 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2299 if (processor_alias_table[i].flags & PTA_SSE4A
2300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2301 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2302 if (processor_alias_table[i].flags & PTA_SSE5
2303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2304 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2306 if (processor_alias_table[i].flags & PTA_ABM)
2308 if (processor_alias_table[i].flags & PTA_CX16)
2309 x86_cmpxchg16b = true;
2310 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2312 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2313 x86_prefetch_sse = true;
2314 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2321 error ("bad value (%s) for -march= switch", ix86_arch_string);
2323 ix86_arch_mask = 1u << ix86_arch;
2324 for (i = 0; i < X86_ARCH_LAST; ++i)
2325 ix86_arch_features[i] &= ix86_arch_mask;
2327 for (i = 0; i < pta_size; i++)
2328 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2330 ix86_tune = processor_alias_table[i].processor;
2331 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2333 if (ix86_tune_defaulted)
2335 ix86_tune_string = "x86-64";
2336 for (i = 0; i < pta_size; i++)
2337 if (! strcmp (ix86_tune_string,
2338 processor_alias_table[i].name))
2340 ix86_tune = processor_alias_table[i].processor;
2343 error ("CPU you selected does not support x86-64 "
2346 /* Intel CPUs have always interpreted SSE prefetch instructions as
2347 NOPs; so, we can enable SSE prefetch instructions even when
2348 -mtune (rather than -march) points us to a processor that has them.
2349 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2350 higher processors. */
2352 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2353 x86_prefetch_sse = true;
2357 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2359 ix86_tune_mask = 1u << ix86_tune;
2360 for (i = 0; i < X86_TUNE_LAST; ++i)
2361 ix86_tune_features[i] &= ix86_tune_mask;
2364 ix86_cost = &size_cost;
2366 ix86_cost = processor_target_table[ix86_tune].cost;
2368 /* Arrange to set up i386_stack_locals for all functions. */
2369 init_machine_status = ix86_init_machine_status;
2371 /* Validate -mregparm= value. */
2372 if (ix86_regparm_string)
2375 warning (0, "-mregparm is ignored in 64-bit mode");
2376 i = atoi (ix86_regparm_string);
2377 if (i < 0 || i > REGPARM_MAX)
2378 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2383 ix86_regparm = REGPARM_MAX;
2385 /* If the user has provided any of the -malign-* options,
2386 warn and use that value only if -falign-* is not set.
2387 Remove this code in GCC 3.2 or later. */
2388 if (ix86_align_loops_string)
2390 warning (0, "-malign-loops is obsolete, use -falign-loops");
2391 if (align_loops == 0)
2393 i = atoi (ix86_align_loops_string);
2394 if (i < 0 || i > MAX_CODE_ALIGN)
2395 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2397 align_loops = 1 << i;
2401 if (ix86_align_jumps_string)
2403 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2404 if (align_jumps == 0)
2406 i = atoi (ix86_align_jumps_string);
2407 if (i < 0 || i > MAX_CODE_ALIGN)
2408 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2410 align_jumps = 1 << i;
2414 if (ix86_align_funcs_string)
2416 warning (0, "-malign-functions is obsolete, use -falign-functions");
2417 if (align_functions == 0)
2419 i = atoi (ix86_align_funcs_string);
2420 if (i < 0 || i > MAX_CODE_ALIGN)
2421 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2423 align_functions = 1 << i;
2427 /* Default align_* from the processor table. */
2428 if (align_loops == 0)
2430 align_loops = processor_target_table[ix86_tune].align_loop;
2431 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2433 if (align_jumps == 0)
2435 align_jumps = processor_target_table[ix86_tune].align_jump;
2436 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2438 if (align_functions == 0)
2440 align_functions = processor_target_table[ix86_tune].align_func;
2443 /* Validate -mbranch-cost= value, or provide default. */
2444 ix86_branch_cost = ix86_cost->branch_cost;
2445 if (ix86_branch_cost_string)
2447 i = atoi (ix86_branch_cost_string);
2449 error ("-mbranch-cost=%d is not between 0 and 5", i);
2451 ix86_branch_cost = i;
2453 if (ix86_section_threshold_string)
2455 i = atoi (ix86_section_threshold_string);
2457 error ("-mlarge-data-threshold=%d is negative", i);
2459 ix86_section_threshold = i;
2462 if (ix86_tls_dialect_string)
2464 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2465 ix86_tls_dialect = TLS_DIALECT_GNU;
2466 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2467 ix86_tls_dialect = TLS_DIALECT_GNU2;
2468 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2469 ix86_tls_dialect = TLS_DIALECT_SUN;
2471 error ("bad value (%s) for -mtls-dialect= switch",
2472 ix86_tls_dialect_string);
2475 if (ix87_precision_string)
2477 i = atoi (ix87_precision_string);
2478 if (i != 32 && i != 64 && i != 80)
2479 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2484 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2486 /* Enable by default the SSE and MMX builtins. Do allow the user to
2487 explicitly disable any of these. In particular, disabling SSE and
2488 MMX for kernel code is extremely useful. */
2489 if (!ix86_arch_specified)
2491 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2492 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2495 warning (0, "-mrtd is ignored in 64bit mode");
2499 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2501 if (!ix86_arch_specified)
2503 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2505 /* i386 ABI does not specify red zone. It still makes sense to use it
2506 when programmer takes care to stack from being destroyed. */
2507 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2508 target_flags |= MASK_NO_RED_ZONE;
2511 /* Keep nonleaf frame pointers. */
2512 if (flag_omit_frame_pointer)
2513 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2514 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2515 flag_omit_frame_pointer = 1;
2517 /* If we're doing fast math, we don't care about comparison order
2518 wrt NaNs. This lets us use a shorter comparison sequence. */
2519 if (flag_finite_math_only)
2520 target_flags &= ~MASK_IEEE_FP;
2522 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2523 since the insns won't need emulation. */
2524 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2525 target_flags &= ~MASK_NO_FANCY_MATH_387;
2527 /* Likewise, if the target doesn't have a 387, or we've specified
2528 software floating point, don't use 387 inline intrinsics. */
2530 target_flags |= MASK_NO_FANCY_MATH_387;
2532 /* Turn on SSE4A bultins for -msse5. */
2534 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2536 /* Turn on SSE4.1 builtins for -msse4.2. */
2538 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2540 /* Turn on SSSE3 builtins for -msse4.1. */
2542 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2544 /* Turn on SSE3 builtins for -mssse3. */
2546 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2548 /* Turn on SSE3 builtins for -msse4a. */
2550 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2552 /* Turn on SSE2 builtins for -msse3. */
2554 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2556 /* Turn on SSE builtins for -msse2. */
2558 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2560 /* Turn on MMX builtins for -msse. */
2563 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2564 x86_prefetch_sse = true;
2567 /* Turn on MMX builtins for 3Dnow. */
2569 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2571 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2572 if (TARGET_SSE4_2 || TARGET_ABM)
2575 /* Validate -mpreferred-stack-boundary= value, or provide default.
2576 The default of 128 bits is for Pentium III's SSE __m128. We can't
2577 change it because of optimize_size. Otherwise, we can't mix object
2578 files compiled with -Os and -On. */
2579 ix86_preferred_stack_boundary = 128;
2580 if (ix86_preferred_stack_boundary_string)
2582 i = atoi (ix86_preferred_stack_boundary_string);
2583 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2584 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2585 TARGET_64BIT ? 4 : 2);
2587 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2590 /* Accept -msseregparm only if at least SSE support is enabled. */
2591 if (TARGET_SSEREGPARM
2593 error ("-msseregparm used without SSE enabled");
2595 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2596 if (ix86_fpmath_string != 0)
2598 if (! strcmp (ix86_fpmath_string, "387"))
2599 ix86_fpmath = FPMATH_387;
2600 else if (! strcmp (ix86_fpmath_string, "sse"))
2604 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2605 ix86_fpmath = FPMATH_387;
2608 ix86_fpmath = FPMATH_SSE;
2610 else if (! strcmp (ix86_fpmath_string, "387,sse")
2611 || ! strcmp (ix86_fpmath_string, "sse,387"))
2615 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2616 ix86_fpmath = FPMATH_387;
2618 else if (!TARGET_80387)
2620 warning (0, "387 instruction set disabled, using SSE arithmetics");
2621 ix86_fpmath = FPMATH_SSE;
2624 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2627 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2630 /* If the i387 is disabled, then do not return values in it. */
2632 target_flags &= ~MASK_FLOAT_RETURNS;
2634 /* Use external vectorized library in vectorizing intrinsics. */
2635 if (ix86_veclibabi_string)
2637 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2638 ix86_veclib_handler = ix86_veclibabi_acml;
2640 error ("unknown vectorization library ABI type (%s) for "
2641 "-mveclibabi= switch", ix86_veclibabi_string);
2644 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2645 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2647 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2649 /* ??? Unwind info is not correct around the CFG unless either a frame
2650 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2651 unwind info generation to be aware of the CFG and propagating states
2653 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2654 || flag_exceptions || flag_non_call_exceptions)
2655 && flag_omit_frame_pointer
2656 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2658 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2659 warning (0, "unwind tables currently require either a frame pointer "
2660 "or -maccumulate-outgoing-args for correctness");
2661 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2664 /* For sane SSE instruction set generation we need fcomi instruction.
2665 It is safe to enable all CMOVE instructions. */
2669 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2672 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2673 p = strchr (internal_label_prefix, 'X');
2674 internal_label_prefix_len = p - internal_label_prefix;
2678 /* When scheduling description is not available, disable scheduler pass
2679 so it won't slow down the compilation and make x87 code slower. */
2680 if (!TARGET_SCHEDULE)
2681 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2683 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2684 set_param_value ("simultaneous-prefetches",
2685 ix86_cost->simultaneous_prefetches);
2686 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2687 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2688 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2689 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2690 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2691 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2693 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2694 can be optimized to ap = __builtin_next_arg (0). */
2695 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2696 targetm.expand_builtin_va_start = NULL;
2699 /* Return true if this goes in large data/bss. */
2702 ix86_in_large_data_p (tree exp)
2704 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2707 /* Functions are never large data. */
2708 if (TREE_CODE (exp) == FUNCTION_DECL)
2711 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2713 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2714 if (strcmp (section, ".ldata") == 0
2715 || strcmp (section, ".lbss") == 0)
2721 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2723 /* If this is an incomplete type with size 0, then we can't put it
2724 in data because it might be too big when completed. */
2725 if (!size || size > ix86_section_threshold)
2732 /* Switch to the appropriate section for output of DECL.
2733 DECL is either a `VAR_DECL' node or a constant of some sort.
2734 RELOC indicates whether forming the initial value of DECL requires
2735 link-time relocations. */
2737 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2741 x86_64_elf_select_section (tree decl, int reloc,
2742 unsigned HOST_WIDE_INT align)
2744 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2745 && ix86_in_large_data_p (decl))
2747 const char *sname = NULL;
2748 unsigned int flags = SECTION_WRITE;
2749 switch (categorize_decl_for_section (decl, reloc))
2754 case SECCAT_DATA_REL:
2755 sname = ".ldata.rel";
2757 case SECCAT_DATA_REL_LOCAL:
2758 sname = ".ldata.rel.local";
2760 case SECCAT_DATA_REL_RO:
2761 sname = ".ldata.rel.ro";
2763 case SECCAT_DATA_REL_RO_LOCAL:
2764 sname = ".ldata.rel.ro.local";
2768 flags |= SECTION_BSS;
2771 case SECCAT_RODATA_MERGE_STR:
2772 case SECCAT_RODATA_MERGE_STR_INIT:
2773 case SECCAT_RODATA_MERGE_CONST:
2777 case SECCAT_SRODATA:
2784 /* We don't split these for medium model. Place them into
2785 default sections and hope for best. */
2790 /* We might get called with string constants, but get_named_section
2791 doesn't like them as they are not DECLs. Also, we need to set
2792 flags in that case. */
2794 return get_section (sname, flags, NULL);
2795 return get_named_section (decl, sname, reloc);
2798 return default_elf_select_section (decl, reloc, align);
2801 /* Build up a unique section name, expressed as a
2802 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2803 RELOC indicates whether the initial value of EXP requires
2804 link-time relocations. */
2806 static void ATTRIBUTE_UNUSED
2807 x86_64_elf_unique_section (tree decl, int reloc)
2809 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2810 && ix86_in_large_data_p (decl))
2812 const char *prefix = NULL;
2813 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2814 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2816 switch (categorize_decl_for_section (decl, reloc))
2819 case SECCAT_DATA_REL:
2820 case SECCAT_DATA_REL_LOCAL:
2821 case SECCAT_DATA_REL_RO:
2822 case SECCAT_DATA_REL_RO_LOCAL:
2823 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2826 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2829 case SECCAT_RODATA_MERGE_STR:
2830 case SECCAT_RODATA_MERGE_STR_INIT:
2831 case SECCAT_RODATA_MERGE_CONST:
2832 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2834 case SECCAT_SRODATA:
2841 /* We don't split these for medium model. Place them into
2842 default sections and hope for best. */
2850 plen = strlen (prefix);
2852 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2853 name = targetm.strip_name_encoding (name);
2854 nlen = strlen (name);
2856 string = (char *) alloca (nlen + plen + 1);
2857 memcpy (string, prefix, plen);
2858 memcpy (string + plen, name, nlen + 1);
2860 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2864 default_unique_section (decl, reloc);
2867 #ifdef COMMON_ASM_OP
2868 /* This says how to output assembler code to declare an
2869 uninitialized external linkage data object.
2871 For medium model x86-64 we need to use .largecomm opcode for
2874 x86_elf_aligned_common (FILE *file,
2875 const char *name, unsigned HOST_WIDE_INT size,
2878 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2879 && size > (unsigned int)ix86_section_threshold)
2880 fprintf (file, ".largecomm\t");
2882 fprintf (file, "%s", COMMON_ASM_OP);
2883 assemble_name (file, name);
2884 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2885 size, align / BITS_PER_UNIT);
2889 /* Utility function for targets to use in implementing
2890 ASM_OUTPUT_ALIGNED_BSS. */
2893 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2894 const char *name, unsigned HOST_WIDE_INT size,
2897 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2898 && size > (unsigned int)ix86_section_threshold)
2899 switch_to_section (get_named_section (decl, ".lbss", 0));
2901 switch_to_section (bss_section);
2902 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2903 #ifdef ASM_DECLARE_OBJECT_NAME
2904 last_assemble_variable_decl = decl;
2905 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2907 /* Standard thing is just output label for the object. */
2908 ASM_OUTPUT_LABEL (file, name);
2909 #endif /* ASM_DECLARE_OBJECT_NAME */
2910 ASM_OUTPUT_SKIP (file, size ? size : 1);
2914 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2916 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2917 make the problem with not enough registers even worse. */
2918 #ifdef INSN_SCHEDULING
2920 flag_schedule_insns = 0;
2924 /* The Darwin libraries never set errno, so we might as well
2925 avoid calling them when that's the only reason we would. */
2926 flag_errno_math = 0;
2928 /* The default values of these switches depend on the TARGET_64BIT
2929 that is not known at this moment. Mark these values with 2 and
2930 let user the to override these. In case there is no command line option
2931 specifying them, we will set the defaults in override_options. */
2933 flag_omit_frame_pointer = 2;
2934 flag_pcc_struct_return = 2;
2935 flag_asynchronous_unwind_tables = 2;
2936 flag_vect_cost_model = 1;
2937 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2938 SUBTARGET_OPTIMIZATION_OPTIONS;
2942 /* Decide whether we can make a sibling call to a function. DECL is the
2943 declaration of the function being targeted by the call and EXP is the
2944 CALL_EXPR representing the call. */
2947 ix86_function_ok_for_sibcall (tree decl, tree exp)
2952 /* If we are generating position-independent code, we cannot sibcall
2953 optimize any indirect call, or a direct call to a global function,
2954 as the PLT requires %ebx be live. */
2955 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2962 func = TREE_TYPE (CALL_EXPR_FN (exp));
2963 if (POINTER_TYPE_P (func))
2964 func = TREE_TYPE (func);
2967 /* Check that the return value locations are the same. Like
2968 if we are returning floats on the 80387 register stack, we cannot
2969 make a sibcall from a function that doesn't return a float to a
2970 function that does or, conversely, from a function that does return
2971 a float to a function that doesn't; the necessary stack adjustment
2972 would not be executed. This is also the place we notice
2973 differences in the return value ABI. Note that it is ok for one
2974 of the functions to have void return type as long as the return
2975 value of the other is passed in a register. */
2976 a = ix86_function_value (TREE_TYPE (exp), func, false);
2977 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2979 if (STACK_REG_P (a) || STACK_REG_P (b))
2981 if (!rtx_equal_p (a, b))
2984 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2986 else if (!rtx_equal_p (a, b))
2989 /* If this call is indirect, we'll need to be able to use a call-clobbered
2990 register for the address of the target function. Make sure that all
2991 such registers are not used for passing parameters. */
2992 if (!decl && !TARGET_64BIT)
2996 /* We're looking at the CALL_EXPR, we need the type of the function. */
2997 type = CALL_EXPR_FN (exp); /* pointer expression */
2998 type = TREE_TYPE (type); /* pointer type */
2999 type = TREE_TYPE (type); /* function type */
3001 if (ix86_function_regparm (type, NULL) >= 3)
3003 /* ??? Need to count the actual number of registers to be used,
3004 not the possible number of registers. Fix later. */
3009 /* Dllimport'd functions are also called indirectly. */
3010 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3011 && decl && DECL_DLLIMPORT_P (decl)
3012 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3015 /* If we forced aligned the stack, then sibcalling would unalign the
3016 stack, which may break the called function. */
3017 if (cfun->machine->force_align_arg_pointer)
3020 /* Otherwise okay. That also includes certain types of indirect calls. */
3024 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3025 calling convention attributes;
3026 arguments as in struct attribute_spec.handler. */
3029 ix86_handle_cconv_attribute (tree *node, tree name,
3031 int flags ATTRIBUTE_UNUSED,
3034 if (TREE_CODE (*node) != FUNCTION_TYPE
3035 && TREE_CODE (*node) != METHOD_TYPE
3036 && TREE_CODE (*node) != FIELD_DECL
3037 && TREE_CODE (*node) != TYPE_DECL)
3039 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3040 IDENTIFIER_POINTER (name));
3041 *no_add_attrs = true;
3045 /* Can combine regparm with all attributes but fastcall. */
3046 if (is_attribute_p ("regparm", name))
3050 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3052 error ("fastcall and regparm attributes are not compatible");
3055 cst = TREE_VALUE (args);
3056 if (TREE_CODE (cst) != INTEGER_CST)
3058 warning (OPT_Wattributes,
3059 "%qs attribute requires an integer constant argument",
3060 IDENTIFIER_POINTER (name));
3061 *no_add_attrs = true;
3063 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3065 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3066 IDENTIFIER_POINTER (name), REGPARM_MAX);
3067 *no_add_attrs = true;
3071 && lookup_attribute (ix86_force_align_arg_pointer_string,
3072 TYPE_ATTRIBUTES (*node))
3073 && compare_tree_int (cst, REGPARM_MAX-1))
3075 error ("%s functions limited to %d register parameters",
3076 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3084 /* Do not warn when emulating the MS ABI. */
3085 if (!TARGET_64BIT_MS_ABI)
3086 warning (OPT_Wattributes, "%qs attribute ignored",
3087 IDENTIFIER_POINTER (name));
3088 *no_add_attrs = true;
3092 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3093 if (is_attribute_p ("fastcall", name))
3095 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3097 error ("fastcall and cdecl attributes are not compatible");
3099 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3101 error ("fastcall and stdcall attributes are not compatible");
3103 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3105 error ("fastcall and regparm attributes are not compatible");
3109 /* Can combine stdcall with fastcall (redundant), regparm and
3111 else if (is_attribute_p ("stdcall", name))
3113 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3115 error ("stdcall and cdecl attributes are not compatible");
3117 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3119 error ("stdcall and fastcall attributes are not compatible");
3123 /* Can combine cdecl with regparm and sseregparm. */
3124 else if (is_attribute_p ("cdecl", name))
3126 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3128 error ("stdcall and cdecl attributes are not compatible");
3130 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3132 error ("fastcall and cdecl attributes are not compatible");
3136 /* Can combine sseregparm with all attributes. */
3141 /* Return 0 if the attributes for two types are incompatible, 1 if they
3142 are compatible, and 2 if they are nearly compatible (which causes a
3143 warning to be generated). */
3146 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3148 /* Check for mismatch of non-default calling convention. */
3149 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3151 if (TREE_CODE (type1) != FUNCTION_TYPE
3152 && TREE_CODE (type1) != METHOD_TYPE)
3155 /* Check for mismatched fastcall/regparm types. */
3156 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3157 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3158 || (ix86_function_regparm (type1, NULL)
3159 != ix86_function_regparm (type2, NULL)))
3162 /* Check for mismatched sseregparm types. */
3163 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3164 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3167 /* Check for mismatched return types (cdecl vs stdcall). */
3168 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3169 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3175 /* Return the regparm value for a function with the indicated TYPE and DECL.
3176 DECL may be NULL when calling function indirectly
3177 or considering a libcall. */
3180 ix86_function_regparm (const_tree type, const_tree decl)
3183 int regparm = ix86_regparm;
3188 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3190 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3192 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3195 /* Use register calling convention for local functions when possible. */
3196 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3197 && flag_unit_at_a_time && !profile_flag)
3199 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3200 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3203 int local_regparm, globals = 0, regno;
3206 /* Make sure no regparm register is taken by a
3207 fixed register variable. */
3208 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3209 if (fixed_regs[local_regparm])
3212 /* We can't use regparm(3) for nested functions as these use
3213 static chain pointer in third argument. */
3214 if (local_regparm == 3
3215 && (decl_function_context (decl)
3216 || ix86_force_align_arg_pointer)
3217 && !DECL_NO_STATIC_CHAIN (decl))
3220 /* If the function realigns its stackpointer, the prologue will
3221 clobber %ecx. If we've already generated code for the callee,
3222 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3223 scanning the attributes for the self-realigning property. */
3224 f = DECL_STRUCT_FUNCTION (decl);
3225 if (local_regparm == 3
3226 && (f ? !!f->machine->force_align_arg_pointer
3227 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3228 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3231 /* Each fixed register usage increases register pressure,
3232 so less registers should be used for argument passing.
3233 This functionality can be overriden by an explicit
3235 for (regno = 0; regno <= DI_REG; regno++)
3236 if (fixed_regs[regno])
3240 = globals < local_regparm ? local_regparm - globals : 0;
3242 if (local_regparm > regparm)
3243 regparm = local_regparm;
3250 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3251 DFmode (2) arguments in SSE registers for a function with the
3252 indicated TYPE and DECL. DECL may be NULL when calling function
3253 indirectly or considering a libcall. Otherwise return 0. */
3256 ix86_function_sseregparm (const_tree type, const_tree decl)
3258 gcc_assert (!TARGET_64BIT);
3260 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3261 by the sseregparm attribute. */
3262 if (TARGET_SSEREGPARM
3263 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3268 error ("Calling %qD with attribute sseregparm without "
3269 "SSE/SSE2 enabled", decl);
3271 error ("Calling %qT with attribute sseregparm without "
3272 "SSE/SSE2 enabled", type);
3279 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3280 (and DFmode for SSE2) arguments in SSE registers. */
3281 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3283 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3284 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3286 return TARGET_SSE2 ? 2 : 1;
3292 /* Return true if EAX is live at the start of the function. Used by
3293 ix86_expand_prologue to determine if we need special help before
3294 calling allocate_stack_worker. */
3297 ix86_eax_live_at_start_p (void)
3299 /* Cheat. Don't bother working forward from ix86_function_regparm
3300 to the function type to whether an actual argument is located in
3301 eax. Instead just look at cfg info, which is still close enough
3302 to correct at this point. This gives false positives for broken
3303 functions that might use uninitialized data that happens to be
3304 allocated in eax, but who cares? */
3305 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3308 /* Value is the number of bytes of arguments automatically
3309 popped when returning from a subroutine call.
3310 FUNDECL is the declaration node of the function (as a tree),
3311 FUNTYPE is the data type of the function (as a tree),
3312 or for a library call it is an identifier node for the subroutine name.
3313 SIZE is the number of bytes of arguments passed on the stack.
3315 On the 80386, the RTD insn may be used to pop them if the number
3316 of args is fixed, but if the number is variable then the caller
3317 must pop them all. RTD can't be used for library calls now
3318 because the library is compiled with the Unix compiler.
3319 Use of RTD is a selectable option, since it is incompatible with
3320 standard Unix calling sequences. If the option is not selected,
3321 the caller must always pop the args.
3323 The attribute stdcall is equivalent to RTD on a per module basis. */
3326 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3330 /* None of the 64-bit ABIs pop arguments. */
3334 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3336 /* Cdecl functions override -mrtd, and never pop the stack. */
3337 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3339 /* Stdcall and fastcall functions will pop the stack if not
3341 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3342 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3345 if (rtd && ! stdarg_p (funtype))
3349 /* Lose any fake structure return argument if it is passed on the stack. */
3350 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3351 && !KEEP_AGGREGATE_RETURN_POINTER)
3353 int nregs = ix86_function_regparm (funtype, fundecl);
3355 return GET_MODE_SIZE (Pmode);
3361 /* Argument support functions. */
3363 /* Return true when register may be used to pass function parameters. */
3365 ix86_function_arg_regno_p (int regno)
3368 const int *parm_regs;
3373 return (regno < REGPARM_MAX
3374 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3376 return (regno < REGPARM_MAX
3377 || (TARGET_MMX && MMX_REGNO_P (regno)
3378 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3379 || (TARGET_SSE && SSE_REGNO_P (regno)
3380 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3385 if (SSE_REGNO_P (regno) && TARGET_SSE)
3390 if (TARGET_SSE && SSE_REGNO_P (regno)
3391 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3395 /* RAX is used as hidden argument to va_arg functions. */
3396 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3399 if (TARGET_64BIT_MS_ABI)
3400 parm_regs = x86_64_ms_abi_int_parameter_registers;
3402 parm_regs = x86_64_int_parameter_registers;
3403 for (i = 0; i < REGPARM_MAX; i++)
3404 if (regno == parm_regs[i])
3409 /* Return if we do not know how to pass TYPE solely in registers. */
3412 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3414 if (must_pass_in_stack_var_size_or_pad (mode, type))
3417 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3418 The layout_type routine is crafty and tries to trick us into passing
3419 currently unsupported vector types on the stack by using TImode. */
3420 return (!TARGET_64BIT && mode == TImode
3421 && type && TREE_CODE (type) != VECTOR_TYPE);
3424 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3425 for a call to a function whose data type is FNTYPE.
3426 For a library call, FNTYPE is 0. */
3429 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3430 tree fntype, /* tree ptr for function decl */
3431 rtx libname, /* SYMBOL_REF of library name or 0 */
3434 memset (cum, 0, sizeof (*cum));
3436 /* Set up the number of registers to use for passing arguments. */
3437 cum->nregs = ix86_regparm;
3439 cum->sse_nregs = SSE_REGPARM_MAX;
3441 cum->mmx_nregs = MMX_REGPARM_MAX;
3442 cum->warn_sse = true;
3443 cum->warn_mmx = true;
3444 cum->maybe_vaarg = (fntype
3445 ? (!prototype_p (fntype) || stdarg_p (fntype))
3450 /* If there are variable arguments, then we won't pass anything
3451 in registers in 32-bit mode. */
3452 if (cum->maybe_vaarg)
3462 /* Use ecx and edx registers if function has fastcall attribute,
3463 else look for regparm information. */
3466 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3472 cum->nregs = ix86_function_regparm (fntype, fndecl);
3475 /* Set up the number of SSE registers used for passing SFmode
3476 and DFmode arguments. Warn for mismatching ABI. */
3477 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3481 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3482 But in the case of vector types, it is some vector mode.
3484 When we have only some of our vector isa extensions enabled, then there
3485 are some modes for which vector_mode_supported_p is false. For these
3486 modes, the generic vector support in gcc will choose some non-vector mode
3487 in order to implement the type. By computing the natural mode, we'll
3488 select the proper ABI location for the operand and not depend on whatever
3489 the middle-end decides to do with these vector types. */
3491 static enum machine_mode
3492 type_natural_mode (const_tree type)
3494 enum machine_mode mode = TYPE_MODE (type);
3496 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3498 HOST_WIDE_INT size = int_size_in_bytes (type);
3499 if ((size == 8 || size == 16)
3500 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3501 && TYPE_VECTOR_SUBPARTS (type) > 1)
3503 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3505 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3506 mode = MIN_MODE_VECTOR_FLOAT;
3508 mode = MIN_MODE_VECTOR_INT;
3510 /* Get the mode which has this inner mode and number of units. */
3511 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3512 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3513 && GET_MODE_INNER (mode) == innermode)
3523 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3524 this may not agree with the mode that the type system has chosen for the
3525 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3526 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3529 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3534 if (orig_mode != BLKmode)
3535 tmp = gen_rtx_REG (orig_mode, regno);
3538 tmp = gen_rtx_REG (mode, regno);
3539 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3540 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3546 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3547 of this code is to classify each 8bytes of incoming argument by the register
3548 class and assign registers accordingly. */
3550 /* Return the union class of CLASS1 and CLASS2.
3551 See the x86-64 PS ABI for details. */
3553 static enum x86_64_reg_class
3554 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3556 /* Rule #1: If both classes are equal, this is the resulting class. */
3557 if (class1 == class2)
3560 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3562 if (class1 == X86_64_NO_CLASS)
3564 if (class2 == X86_64_NO_CLASS)
3567 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3568 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3569 return X86_64_MEMORY_CLASS;
3571 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3572 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3573 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3574 return X86_64_INTEGERSI_CLASS;
3575 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3576 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3577 return X86_64_INTEGER_CLASS;
3579 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3581 if (class1 == X86_64_X87_CLASS
3582 || class1 == X86_64_X87UP_CLASS
3583 || class1 == X86_64_COMPLEX_X87_CLASS
3584 || class2 == X86_64_X87_CLASS
3585 || class2 == X86_64_X87UP_CLASS
3586 || class2 == X86_64_COMPLEX_X87_CLASS)
3587 return X86_64_MEMORY_CLASS;
3589 /* Rule #6: Otherwise class SSE is used. */
3590 return X86_64_SSE_CLASS;
3593 /* Classify the argument of type TYPE and mode MODE.
3594 CLASSES will be filled by the register class used to pass each word
3595 of the operand. The number of words is returned. In case the parameter
3596 should be passed in memory, 0 is returned. As a special case for zero
3597 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3599 BIT_OFFSET is used internally for handling records and specifies offset
3600 of the offset in bits modulo 256 to avoid overflow cases.
3602 See the x86-64 PS ABI for details.
3606 classify_argument (enum machine_mode mode, const_tree type,
3607 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3609 HOST_WIDE_INT bytes =
3610 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3611 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3613 /* Variable sized entities are always passed/returned in memory. */
3617 if (mode != VOIDmode
3618 && targetm.calls.must_pass_in_stack (mode, type))
3621 if (type && AGGREGATE_TYPE_P (type))
3625 enum x86_64_reg_class subclasses[MAX_CLASSES];
3627 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3631 for (i = 0; i < words; i++)
3632 classes[i] = X86_64_NO_CLASS;
3634 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3635 signalize memory class, so handle it as special case. */
3638 classes[0] = X86_64_NO_CLASS;
3642 /* Classify each field of record and merge classes. */
3643 switch (TREE_CODE (type))
3646 /* And now merge the fields of structure. */
3647 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3649 if (TREE_CODE (field) == FIELD_DECL)
3653 if (TREE_TYPE (field) == error_mark_node)
3656 /* Bitfields are always classified as integer. Handle them
3657 early, since later code would consider them to be
3658 misaligned integers. */
3659 if (DECL_BIT_FIELD (field))
3661 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3662 i < ((int_bit_position (field) + (bit_offset % 64))
3663 + tree_low_cst (DECL_SIZE (field), 0)
3666 merge_classes (X86_64_INTEGER_CLASS,
3671 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3672 TREE_TYPE (field), subclasses,
3673 (int_bit_position (field)
3674 + bit_offset) % 256);
3677 for (i = 0; i < num; i++)
3680 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3682 merge_classes (subclasses[i], classes[i + pos]);
3690 /* Arrays are handled as small records. */
3693 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3694 TREE_TYPE (type), subclasses, bit_offset);
3698 /* The partial classes are now full classes. */
3699 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3700 subclasses[0] = X86_64_SSE_CLASS;
3701 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3702 subclasses[0] = X86_64_INTEGER_CLASS;
3704 for (i = 0; i < words; i++)
3705 classes[i] = subclasses[i % num];
3710 case QUAL_UNION_TYPE:
3711 /* Unions are similar to RECORD_TYPE but offset is always 0.
3713 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3715 if (TREE_CODE (field) == FIELD_DECL)
3719 if (TREE_TYPE (field) == error_mark_node)
3722 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3723 TREE_TYPE (field), subclasses,
3727 for (i = 0; i < num; i++)
3728 classes[i] = merge_classes (subclasses[i], classes[i]);
3737 /* Final merger cleanup. */
3738 for (i = 0; i < words; i++)
3740 /* If one class is MEMORY, everything should be passed in
3742 if (classes[i] == X86_64_MEMORY_CLASS)
3745 /* The X86_64_SSEUP_CLASS should be always preceded by
3746 X86_64_SSE_CLASS. */
3747 if (classes[i] == X86_64_SSEUP_CLASS
3748 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3749 classes[i] = X86_64_SSE_CLASS;
3751 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3752 if (classes[i] == X86_64_X87UP_CLASS
3753 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3754 classes[i] = X86_64_SSE_CLASS;
3759 /* Compute alignment needed. We align all types to natural boundaries with
3760 exception of XFmode that is aligned to 64bits. */
3761 if (mode != VOIDmode && mode != BLKmode)
3763 int mode_alignment = GET_MODE_BITSIZE (mode);
3766 mode_alignment = 128;
3767 else if (mode == XCmode)
3768 mode_alignment = 256;
3769 if (COMPLEX_MODE_P (mode))
3770 mode_alignment /= 2;
3771 /* Misaligned fields are always returned in memory. */
3772 if (bit_offset % mode_alignment)
3776 /* for V1xx modes, just use the base mode */
3777 if (VECTOR_MODE_P (mode)
3778 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3779 mode = GET_MODE_INNER (mode);
3781 /* Classification of atomic types. */
3786 classes[0] = X86_64_SSE_CLASS;
3789 classes[0] = X86_64_SSE_CLASS;
3790 classes[1] = X86_64_SSEUP_CLASS;
3799 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3800 classes[0] = X86_64_INTEGERSI_CLASS;
3802 classes[0] = X86_64_INTEGER_CLASS;
3806 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3811 if (!(bit_offset % 64))
3812 classes[0] = X86_64_SSESF_CLASS;
3814 classes[0] = X86_64_SSE_CLASS;
3817 classes[0] = X86_64_SSEDF_CLASS;
3820 classes[0] = X86_64_X87_CLASS;
3821 classes[1] = X86_64_X87UP_CLASS;
3824 classes[0] = X86_64_SSE_CLASS;
3825 classes[1] = X86_64_SSEUP_CLASS;
3828 classes[0] = X86_64_SSE_CLASS;
3831 classes[0] = X86_64_SSEDF_CLASS;
3832 classes[1] = X86_64_SSEDF_CLASS;
3835 classes[0] = X86_64_COMPLEX_X87_CLASS;
3838 /* This modes is larger than 16 bytes. */
3846 classes[0] = X86_64_SSE_CLASS;
3847 classes[1] = X86_64_SSEUP_CLASS;
3853 classes[0] = X86_64_SSE_CLASS;
3859 gcc_assert (VECTOR_MODE_P (mode));
3864 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3866 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3867 classes[0] = X86_64_INTEGERSI_CLASS;
3869 classes[0] = X86_64_INTEGER_CLASS;
3870 classes[1] = X86_64_INTEGER_CLASS;
3871 return 1 + (bytes > 8);
3875 /* Examine the argument and return set number of register required in each
3876 class. Return 0 iff parameter should be passed in memory. */
3878 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3879 int *int_nregs, int *sse_nregs)
3881 enum x86_64_reg_class regclass[MAX_CLASSES];
3882 int n = classify_argument (mode, type, regclass, 0);
3888 for (n--; n >= 0; n--)
3889 switch (regclass[n])
3891 case X86_64_INTEGER_CLASS:
3892 case X86_64_INTEGERSI_CLASS:
3895 case X86_64_SSE_CLASS:
3896 case X86_64_SSESF_CLASS:
3897 case X86_64_SSEDF_CLASS:
3900 case X86_64_NO_CLASS:
3901 case X86_64_SSEUP_CLASS:
3903 case X86_64_X87_CLASS:
3904 case X86_64_X87UP_CLASS:
3908 case X86_64_COMPLEX_X87_CLASS:
3909 return in_return ? 2 : 0;
3910 case X86_64_MEMORY_CLASS:
3916 /* Construct container for the argument used by GCC interface. See
3917 FUNCTION_ARG for the detailed description. */
3920 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3921 const_tree type, int in_return, int nintregs, int nsseregs,
3922 const int *intreg, int sse_regno)
3924 /* The following variables hold the static issued_error state. */
3925 static bool issued_sse_arg_error;
3926 static bool issued_sse_ret_error;
3927 static bool issued_x87_ret_error;
3929 enum machine_mode tmpmode;
3931 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3932 enum x86_64_reg_class regclass[MAX_CLASSES];
3936 int needed_sseregs, needed_intregs;
3937 rtx exp[MAX_CLASSES];
3940 n = classify_argument (mode, type, regclass, 0);
3943 if (!examine_argument (mode, type, in_return, &needed_intregs,
3946 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3949 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3950 some less clueful developer tries to use floating-point anyway. */
3951 if (needed_sseregs && !TARGET_SSE)
3955 if (!issued_sse_ret_error)
3957 error ("SSE register return with SSE disabled");
3958 issued_sse_ret_error = true;
3961 else if (!issued_sse_arg_error)
3963 error ("SSE register argument with SSE disabled");
3964 issued_sse_arg_error = true;
3969 /* Likewise, error if the ABI requires us to return values in the
3970 x87 registers and the user specified -mno-80387. */
3971 if (!TARGET_80387 && in_return)
3972 for (i = 0; i < n; i++)
3973 if (regclass[i] == X86_64_X87_CLASS
3974 || regclass[i] == X86_64_X87UP_CLASS
3975 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3977 if (!issued_x87_ret_error)
3979 error ("x87 register return with x87 disabled");
3980 issued_x87_ret_error = true;
3985 /* First construct simple cases. Avoid SCmode, since we want to use
3986 single register to pass this type. */
3987 if (n == 1 && mode != SCmode)
3988 switch (regclass[0])
3990 case X86_64_INTEGER_CLASS:
3991 case X86_64_INTEGERSI_CLASS:
3992 return gen_rtx_REG (mode, intreg[0]);
3993 case X86_64_SSE_CLASS:
3994 case X86_64_SSESF_CLASS:
3995 case X86_64_SSEDF_CLASS:
3996 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3997 case X86_64_X87_CLASS:
3998 case X86_64_COMPLEX_X87_CLASS:
3999 return gen_rtx_REG (mode, FIRST_STACK_REG);
4000 case X86_64_NO_CLASS:
4001 /* Zero sized array, struct or class. */
4006 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4007 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4008 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4011 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4012 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4013 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4014 && regclass[1] == X86_64_INTEGER_CLASS
4015 && (mode == CDImode || mode == TImode || mode == TFmode)
4016 && intreg[0] + 1 == intreg[1])
4017 return gen_rtx_REG (mode, intreg[0]);
4019 /* Otherwise figure out the entries of the PARALLEL. */
4020 for (i = 0; i < n; i++)
4022 switch (regclass[i])
4024 case X86_64_NO_CLASS:
4026 case X86_64_INTEGER_CLASS:
4027 case X86_64_INTEGERSI_CLASS:
4028 /* Merge TImodes on aligned occasions here too. */
4029 if (i * 8 + 8 > bytes)
4030 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4031 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4035 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4036 if (tmpmode == BLKmode)
4038 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4039 gen_rtx_REG (tmpmode, *intreg),
4043 case X86_64_SSESF_CLASS:
4044 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4045 gen_rtx_REG (SFmode,
4046 SSE_REGNO (sse_regno)),
4050 case X86_64_SSEDF_CLASS:
4051 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4052 gen_rtx_REG (DFmode,
4053 SSE_REGNO (sse_regno)),
4057 case X86_64_SSE_CLASS:
4058 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4062 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4063 gen_rtx_REG (tmpmode,
4064 SSE_REGNO (sse_regno)),
4066 if (tmpmode == TImode)
4075 /* Empty aligned struct, union or class. */
4079 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4080 for (i = 0; i < nexps; i++)
4081 XVECEXP (ret, 0, i) = exp [i];
4085 /* Update the data in CUM to advance over an argument of mode MODE
4086 and data type TYPE. (TYPE is null for libcalls where that information
4087 may not be available.) */
4090 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4091 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4107 cum->words += words;
4108 cum->nregs -= words;
4109 cum->regno += words;
4111 if (cum->nregs <= 0)
4119 if (cum->float_in_sse < 2)
4122 if (cum->float_in_sse < 1)
4133 if (!type || !AGGREGATE_TYPE_P (type))
4135 cum->sse_words += words;
4136 cum->sse_nregs -= 1;
4137 cum->sse_regno += 1;
4138 if (cum->sse_nregs <= 0)
4150 if (!type || !AGGREGATE_TYPE_P (type))
4152 cum->mmx_words += words;
4153 cum->mmx_nregs -= 1;
4154 cum->mmx_regno += 1;
4155 if (cum->mmx_nregs <= 0)
4166 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4167 tree type, HOST_WIDE_INT words)
4169 int int_nregs, sse_nregs;
4171 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4172 cum->words += words;
4173 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4175 cum->nregs -= int_nregs;
4176 cum->sse_nregs -= sse_nregs;
4177 cum->regno += int_nregs;
4178 cum->sse_regno += sse_nregs;
4181 cum->words += words;
4185 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4186 HOST_WIDE_INT words)
4188 /* Otherwise, this should be passed indirect. */
4189 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4191 cum->words += words;
4200 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4201 tree type, int named ATTRIBUTE_UNUSED)
4203 HOST_WIDE_INT bytes, words;
4205 if (mode == BLKmode)
4206 bytes = int_size_in_bytes (type);
4208 bytes = GET_MODE_SIZE (mode);
4209 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4212 mode = type_natural_mode (type);
4214 if (TARGET_64BIT_MS_ABI)
4215 function_arg_advance_ms_64 (cum, bytes, words);
4216 else if (TARGET_64BIT)
4217 function_arg_advance_64 (cum, mode, type, words);
4219 function_arg_advance_32 (cum, mode, type, bytes, words);
4222 /* Define where to put the arguments to a function.
4223 Value is zero to push the argument on the stack,
4224 or a hard register in which to store the argument.
4226 MODE is the argument's machine mode.
4227 TYPE is the data type of the argument (as a tree).
4228 This is null for libcalls where that information may
4230 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4231 the preceding args and about the function being called.
4232 NAMED is nonzero if this argument is a named parameter
4233 (otherwise it is an extra parameter matching an ellipsis). */
4236 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4237 enum machine_mode orig_mode, tree type,
4238 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4240 static bool warnedsse, warnedmmx;
4242 /* Avoid the AL settings for the Unix64 ABI. */
4243 if (mode == VOIDmode)
4259 if (words <= cum->nregs)
4261 int regno = cum->regno;
4263 /* Fastcall allocates the first two DWORD (SImode) or
4264 smaller arguments to ECX and EDX if it isn't an
4270 || (type && AGGREGATE_TYPE_P (type)))
4273 /* ECX not EAX is the first allocated register. */
4274 if (regno == AX_REG)
4277 return gen_rtx_REG (mode, regno);
4282 if (cum->float_in_sse < 2)
4285 if (cum->float_in_sse < 1)
4295 if (!type || !AGGREGATE_TYPE_P (type))
4297 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4300 warning (0, "SSE vector argument without SSE enabled "
4304 return gen_reg_or_parallel (mode, orig_mode,
4305 cum->sse_regno + FIRST_SSE_REG);
4313 if (!type || !AGGREGATE_TYPE_P (type))
4315 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4318 warning (0, "MMX vector argument without MMX enabled "
4322 return gen_reg_or_parallel (mode, orig_mode,
4323 cum->mmx_regno + FIRST_MMX_REG);
4332 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4333 enum machine_mode orig_mode, tree type)
4335 /* Handle a hidden AL argument containing number of registers
4336 for varargs x86-64 functions. */
4337 if (mode == VOIDmode)
4338 return GEN_INT (cum->maybe_vaarg
4339 ? (cum->sse_nregs < 0
4344 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4346 &x86_64_int_parameter_registers [cum->regno],
4351 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4352 enum machine_mode orig_mode, int named)
4356 /* Avoid the AL settings for the Unix64 ABI. */
4357 if (mode == VOIDmode)
4360 /* If we've run out of registers, it goes on the stack. */
4361 if (cum->nregs == 0)
4364 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4366 /* Only floating point modes are passed in anything but integer regs. */
4367 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4370 regno = cum->regno + FIRST_SSE_REG;
4375 /* Unnamed floating parameters are passed in both the
4376 SSE and integer registers. */
4377 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4378 t2 = gen_rtx_REG (mode, regno);
4379 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4380 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4381 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4385 return gen_reg_or_parallel (mode, orig_mode, regno);
4389 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4390 tree type, int named)
4392 enum machine_mode mode = omode;
4393 HOST_WIDE_INT bytes, words;
4395 if (mode == BLKmode)
4396 bytes = int_size_in_bytes (type);
4398 bytes = GET_MODE_SIZE (mode);
4399 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4401 /* To simplify the code below, represent vector types with a vector mode
4402 even if MMX/SSE are not active. */
4403 if (type && TREE_CODE (type) == VECTOR_TYPE)
4404 mode = type_natural_mode (type);
4406 if (TARGET_64BIT_MS_ABI)
4407 return function_arg_ms_64 (cum, mode, omode, named);
4408 else if (TARGET_64BIT)
4409 return function_arg_64 (cum, mode, omode, type);
4411 return function_arg_32 (cum, mode, omode, type, bytes, words);
4414 /* A C expression that indicates when an argument must be passed by
4415 reference. If nonzero for an argument, a copy of that argument is
4416 made in memory and a pointer to the argument is passed instead of
4417 the argument itself. The pointer is passed in whatever way is
4418 appropriate for passing a pointer to that type. */
4421 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4422 enum machine_mode mode ATTRIBUTE_UNUSED,
4423 const_tree type, bool named ATTRIBUTE_UNUSED)
4425 if (TARGET_64BIT_MS_ABI)
4429 /* Arrays are passed by reference. */
4430 if (TREE_CODE (type) == ARRAY_TYPE)
4433 if (AGGREGATE_TYPE_P (type))
4435 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4436 are passed by reference. */
4437 int el2 = exact_log2 (int_size_in_bytes (type));
4438 return !(el2 >= 0 && el2 <= 3);
4442 /* __m128 is passed by reference. */
4443 /* ??? How to handle complex? For now treat them as structs,
4444 and pass them by reference if they're too large. */
4445 if (GET_MODE_SIZE (mode) > 8)
4448 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4454 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4455 ABI. Only called if TARGET_SSE. */
4457 contains_128bit_aligned_vector_p (tree type)
4459 enum machine_mode mode = TYPE_MODE (type);
4460 if (SSE_REG_MODE_P (mode)
4461 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4463 if (TYPE_ALIGN (type) < 128)
4466 if (AGGREGATE_TYPE_P (type))
4468 /* Walk the aggregates recursively. */
4469 switch (TREE_CODE (type))
4473 case QUAL_UNION_TYPE:
4477 /* Walk all the structure fields. */
4478 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4480 if (TREE_CODE (field) == FIELD_DECL
4481 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4488 /* Just for use if some languages passes arrays by value. */
4489 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4500 /* Gives the alignment boundary, in bits, of an argument with the
4501 specified mode and type. */
4504 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4508 align = TYPE_ALIGN (type);
4510 align = GET_MODE_ALIGNMENT (mode);
4511 if (align < PARM_BOUNDARY)
4512 align = PARM_BOUNDARY;
4515 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4516 make an exception for SSE modes since these require 128bit
4519 The handling here differs from field_alignment. ICC aligns MMX
4520 arguments to 4 byte boundaries, while structure fields are aligned
4521 to 8 byte boundaries. */
4523 align = PARM_BOUNDARY;
4526 if (!SSE_REG_MODE_P (mode))
4527 align = PARM_BOUNDARY;
4531 if (!contains_128bit_aligned_vector_p (type))
4532 align = PARM_BOUNDARY;
4540 /* Return true if N is a possible register number of function value. */
4543 ix86_function_value_regno_p (int regno)
4550 case FIRST_FLOAT_REG:
4551 if (TARGET_64BIT_MS_ABI)
4553 return TARGET_FLOAT_RETURNS_IN_80387;
4559 if (TARGET_MACHO || TARGET_64BIT)
4567 /* Define how to find the value returned by a function.
4568 VALTYPE is the data type of the value (as a tree).
4569 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4570 otherwise, FUNC is 0. */
4573 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4574 const_tree fntype, const_tree fn)
4578 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4579 we normally prevent this case when mmx is not available. However
4580 some ABIs may require the result to be returned like DImode. */
4581 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4582 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4584 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4585 we prevent this case when sse is not available. However some ABIs
4586 may require the result to be returned like integer TImode. */
4587 else if (mode == TImode
4588 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4589 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4591 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4592 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4593 regno = FIRST_FLOAT_REG;
4595 /* Most things go in %eax. */
4598 /* Override FP return register with %xmm0 for local functions when
4599 SSE math is enabled or for functions with sseregparm attribute. */
4600 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4602 int sse_level = ix86_function_sseregparm (fntype, fn);
4603 if ((sse_level >= 1 && mode == SFmode)
4604 || (sse_level == 2 && mode == DFmode))
4605 regno = FIRST_SSE_REG;
4608 return gen_rtx_REG (orig_mode, regno);
4612 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4617 /* Handle libcalls, which don't provide a type node. */
4618 if (valtype == NULL)
4630 return gen_rtx_REG (mode, FIRST_SSE_REG);
4633 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4637 return gen_rtx_REG (mode, AX_REG);
4641 ret = construct_container (mode, orig_mode, valtype, 1,
4642 REGPARM_MAX, SSE_REGPARM_MAX,
4643 x86_64_int_return_registers, 0);
4645 /* For zero sized structures, construct_container returns NULL, but we
4646 need to keep rest of compiler happy by returning meaningful value. */
4648 ret = gen_rtx_REG (orig_mode, AX_REG);
4654 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4656 unsigned int regno = AX_REG;
4660 if (mode == SFmode || mode == DFmode)
4661 regno = FIRST_SSE_REG;
4662 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4663 regno = FIRST_SSE_REG;
4666 return gen_rtx_REG (orig_mode, regno);
4670 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4671 enum machine_mode orig_mode, enum machine_mode mode)
4673 const_tree fn, fntype;
4676 if (fntype_or_decl && DECL_P (fntype_or_decl))
4677 fn = fntype_or_decl;
4678 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4680 if (TARGET_64BIT_MS_ABI)
4681 return function_value_ms_64 (orig_mode, mode);
4682 else if (TARGET_64BIT)
4683 return function_value_64 (orig_mode, mode, valtype);
4685 return function_value_32 (orig_mode, mode, fntype, fn);
4689 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4690 bool outgoing ATTRIBUTE_UNUSED)
4692 enum machine_mode mode, orig_mode;
4694 orig_mode = TYPE_MODE (valtype);
4695 mode = type_natural_mode (valtype);
4696 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4700 ix86_libcall_value (enum machine_mode mode)
4702 return ix86_function_value_1 (NULL, NULL, mode, mode);
4705 /* Return true iff type is returned in memory. */
4708 return_in_memory_32 (const_tree type, enum machine_mode mode)
4712 if (mode == BLKmode)
4715 size = int_size_in_bytes (type);
4717 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4720 if (VECTOR_MODE_P (mode) || mode == TImode)
4722 /* User-created vectors small enough to fit in EAX. */
4726 /* MMX/3dNow values are returned in MM0,
4727 except when it doesn't exits. */
4729 return (TARGET_MMX ? 0 : 1);
4731 /* SSE values are returned in XMM0, except when it doesn't exist. */
4733 return (TARGET_SSE ? 0 : 1);
4748 return_in_memory_64 (const_tree type, enum machine_mode mode)
4750 int needed_intregs, needed_sseregs;
4751 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4755 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4757 HOST_WIDE_INT size = int_size_in_bytes (type);
4759 /* __m128 and friends are returned in xmm0. */
4760 if (!COMPLEX_MODE_P (mode) && size == 16 && VECTOR_MODE_P (mode))
4763 /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4764 return (size != 1 && size != 2 && size != 4 && size != 8)
4765 || COMPLEX_MODE_P (mode);
4769 ix86_return_in_memory (const_tree type)
4771 const enum machine_mode mode = type_natural_mode (type);
4773 if (TARGET_64BIT_MS_ABI)
4774 return return_in_memory_ms_64 (type, mode);
4775 else if (TARGET_64BIT)
4776 return return_in_memory_64 (type, mode);
4778 return return_in_memory_32 (type, mode);
4781 /* Return false iff TYPE is returned in memory. This version is used
4782 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4783 but differs notably in that when MMX is available, 8-byte vectors
4784 are returned in memory, rather than in MMX registers. */
4787 ix86_sol10_return_in_memory (const_tree type)
4790 enum machine_mode mode = type_natural_mode (type);
4793 return return_in_memory_64 (type, mode);
4795 if (mode == BLKmode)
4798 size = int_size_in_bytes (type);
4800 if (VECTOR_MODE_P (mode))
4802 /* Return in memory only if MMX registers *are* available. This
4803 seems backwards, but it is consistent with the existing
4810 else if (mode == TImode)
4812 else if (mode == XFmode)
4818 /* When returning SSE vector types, we have a choice of either
4819 (1) being abi incompatible with a -march switch, or
4820 (2) generating an error.
4821 Given no good solution, I think the safest thing is one warning.
4822 The user won't be able to use -Werror, but....
4824 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4825 called in response to actually generating a caller or callee that
4826 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4827 via aggregate_value_p for general type probing from tree-ssa. */
4830 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4832 static bool warnedsse, warnedmmx;
4834 if (!TARGET_64BIT && type)
4836 /* Look at the return type of the function, not the function type. */
4837 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4839 if (!TARGET_SSE && !warnedsse)
4842 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4845 warning (0, "SSE vector return without SSE enabled "
4850 if (!TARGET_MMX && !warnedmmx)
4852 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4855 warning (0, "MMX vector return without MMX enabled "
4865 /* Create the va_list data type. */
4868 ix86_build_builtin_va_list (void)
4870 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4872 /* For i386 we use plain pointer to argument area. */
4873 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4874 return build_pointer_type (char_type_node);
4876 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4877 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4879 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4880 unsigned_type_node);
4881 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4882 unsigned_type_node);
4883 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4885 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4888 va_list_gpr_counter_field = f_gpr;
4889 va_list_fpr_counter_field = f_fpr;
4891 DECL_FIELD_CONTEXT (f_gpr) = record;
4892 DECL_FIELD_CONTEXT (f_fpr) = record;
4893 DECL_FIELD_CONTEXT (f_ovf) = record;
4894 DECL_FIELD_CONTEXT (f_sav) = record;
4896 TREE_CHAIN (record) = type_decl;
4897 TYPE_NAME (record) = type_decl;
4898 TYPE_FIELDS (record) = f_gpr;
4899 TREE_CHAIN (f_gpr) = f_fpr;
4900 TREE_CHAIN (f_fpr) = f_ovf;
4901 TREE_CHAIN (f_ovf) = f_sav;
4903 layout_type (record);
4905 /* The correct type is an array type of one element. */
4906 return build_array_type (record, build_index_type (size_zero_node));
4909 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4912 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4922 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4925 /* Indicate to allocate space on the stack for varargs save area. */
4926 ix86_save_varrargs_registers = 1;
4927 /* We need 16-byte stack alignment to save SSE registers. If user
4928 asked for lower preferred_stack_boundary, lets just hope that he knows
4929 what he is doing and won't varargs SSE values.
4931 We also may end up assuming that only 64bit values are stored in SSE
4932 register let some floating point program work. */
4933 if (ix86_preferred_stack_boundary >= 128)
4934 cfun->stack_alignment_needed = 128;
4936 save_area = frame_pointer_rtx;
4937 set = get_varargs_alias_set ();
4939 for (i = cum->regno;
4941 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4944 mem = gen_rtx_MEM (Pmode,
4945 plus_constant (save_area, i * UNITS_PER_WORD));
4946 MEM_NOTRAP_P (mem) = 1;
4947 set_mem_alias_set (mem, set);
4948 emit_move_insn (mem, gen_rtx_REG (Pmode,
4949 x86_64_int_parameter_registers[i]));
4952 if (cum->sse_nregs && cfun->va_list_fpr_size)
4954 /* Now emit code to save SSE registers. The AX parameter contains number
4955 of SSE parameter registers used to call this function. We use
4956 sse_prologue_save insn template that produces computed jump across
4957 SSE saves. We need some preparation work to get this working. */
4959 label = gen_label_rtx ();
4960 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4962 /* Compute address to jump to :
4963 label - 5*eax + nnamed_sse_arguments*5 */
4964 tmp_reg = gen_reg_rtx (Pmode);
4965 nsse_reg = gen_reg_rtx (Pmode);
4966 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
4967 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4968 gen_rtx_MULT (Pmode, nsse_reg,
4973 gen_rtx_CONST (DImode,
4974 gen_rtx_PLUS (DImode,
4976 GEN_INT (cum->sse_regno * 4))));
4978 emit_move_insn (nsse_reg, label_ref);
4979 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4981 /* Compute address of memory block we save into. We always use pointer
4982 pointing 127 bytes after first byte to store - this is needed to keep
4983 instruction size limited by 4 bytes. */
4984 tmp_reg = gen_reg_rtx (Pmode);
4985 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4986 plus_constant (save_area,
4987 8 * REGPARM_MAX + 127)));
4988 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4989 MEM_NOTRAP_P (mem) = 1;
4990 set_mem_alias_set (mem, set);
4991 set_mem_align (mem, BITS_PER_WORD);
4993 /* And finally do the dirty job! */
4994 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4995 GEN_INT (cum->sse_regno), label));
5000 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5002 alias_set_type set = get_varargs_alias_set ();
5005 for (i = cum->regno; i < REGPARM_MAX; i++)
5009 mem = gen_rtx_MEM (Pmode,
5010 plus_constant (virtual_incoming_args_rtx,
5011 i * UNITS_PER_WORD));
5012 MEM_NOTRAP_P (mem) = 1;
5013 set_mem_alias_set (mem, set);
5015 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5016 emit_move_insn (mem, reg);
5021 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5022 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5025 CUMULATIVE_ARGS next_cum;
5028 /* This argument doesn't appear to be used anymore. Which is good,
5029 because the old code here didn't suppress rtl generation. */
5030 gcc_assert (!no_rtl);
5035 fntype = TREE_TYPE (current_function_decl);
5037 /* For varargs, we do not want to skip the dummy va_dcl argument.
5038 For stdargs, we do want to skip the last named argument. */
5040 if (stdarg_p (fntype))
5041 function_arg_advance (&next_cum, mode, type, 1);
5043 if (TARGET_64BIT_MS_ABI)
5044 setup_incoming_varargs_ms_64 (&next_cum);
5046 setup_incoming_varargs_64 (&next_cum);
5049 /* Implement va_start. */
5052 ix86_va_start (tree valist, rtx nextarg)
5054 HOST_WIDE_INT words, n_gpr, n_fpr;
5055 tree f_gpr, f_fpr, f_ovf, f_sav;
5056 tree gpr, fpr, ovf, sav, t;
5059 /* Only 64bit target needs something special. */
5060 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5062 std_expand_builtin_va_start (valist, nextarg);
5066 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5067 f_fpr = TREE_CHAIN (f_gpr);
5068 f_ovf = TREE_CHAIN (f_fpr);
5069 f_sav = TREE_CHAIN (f_ovf);
5071 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5072 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5073 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5074 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5075 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5077 /* Count number of gp and fp argument registers used. */
5078 words = current_function_args_info.words;
5079 n_gpr = current_function_args_info.regno;
5080 n_fpr = current_function_args_info.sse_regno;
5082 if (cfun->va_list_gpr_size)
5084 type = TREE_TYPE (gpr);
5085 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5086 build_int_cst (type, n_gpr * 8));
5087 TREE_SIDE_EFFECTS (t) = 1;
5088 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5091 if (cfun->va_list_fpr_size)
5093 type = TREE_TYPE (fpr);
5094 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5095 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5096 TREE_SIDE_EFFECTS (t) = 1;
5097 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5100 /* Find the overflow area. */
5101 type = TREE_TYPE (ovf);
5102 t = make_tree (type, virtual_incoming_args_rtx);
5104 t = build2 (POINTER_PLUS_EXPR, type, t,
5105 size_int (words * UNITS_PER_WORD));
5106 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5107 TREE_SIDE_EFFECTS (t) = 1;
5108 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5110 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5112 /* Find the register save area.
5113 Prologue of the function save it right above stack frame. */
5114 type = TREE_TYPE (sav);
5115 t = make_tree (type, frame_pointer_rtx);
5116 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5117 TREE_SIDE_EFFECTS (t) = 1;
5118 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5122 /* Implement va_arg. */
5125 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5127 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5128 tree f_gpr, f_fpr, f_ovf, f_sav;
5129 tree gpr, fpr, ovf, sav, t;
5131 tree lab_false, lab_over = NULL_TREE;
5136 enum machine_mode nat_mode;
5138 /* Only 64bit target needs something special. */
5139 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5140 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5142 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5143 f_fpr = TREE_CHAIN (f_gpr);
5144 f_ovf = TREE_CHAIN (f_fpr);
5145 f_sav = TREE_CHAIN (f_ovf);
5147 valist = build_va_arg_indirect_ref (valist);
5148 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5149 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5150 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5151 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5153 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5155 type = build_pointer_type (type);
5156 size = int_size_in_bytes (type);
5157 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5159 nat_mode = type_natural_mode (type);
5160 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5161 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5163 /* Pull the value out of the saved registers. */
5165 addr = create_tmp_var (ptr_type_node, "addr");
5166 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5170 int needed_intregs, needed_sseregs;
5172 tree int_addr, sse_addr;
5174 lab_false = create_artificial_label ();
5175 lab_over = create_artificial_label ();
5177 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5179 need_temp = (!REG_P (container)
5180 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5181 || TYPE_ALIGN (type) > 128));
5183 /* In case we are passing structure, verify that it is consecutive block
5184 on the register save area. If not we need to do moves. */
5185 if (!need_temp && !REG_P (container))
5187 /* Verify that all registers are strictly consecutive */
5188 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5192 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5194 rtx slot = XVECEXP (container, 0, i);
5195 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5196 || INTVAL (XEXP (slot, 1)) != i * 16)
5204 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5206 rtx slot = XVECEXP (container, 0, i);
5207 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5208 || INTVAL (XEXP (slot, 1)) != i * 8)
5220 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5221 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5222 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5223 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5226 /* First ensure that we fit completely in registers. */
5229 t = build_int_cst (TREE_TYPE (gpr),
5230 (REGPARM_MAX - needed_intregs + 1) * 8);
5231 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5232 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5233 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5234 gimplify_and_add (t, pre_p);
5238 t = build_int_cst (TREE_TYPE (fpr),
5239 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5241 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5242 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5243 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5244 gimplify_and_add (t, pre_p);
5247 /* Compute index to start of area used for integer regs. */
5250 /* int_addr = gpr + sav; */
5251 t = fold_convert (sizetype, gpr);
5252 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5253 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5254 gimplify_and_add (t, pre_p);
5258 /* sse_addr = fpr + sav; */
5259 t = fold_convert (sizetype, fpr);
5260 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5261 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5262 gimplify_and_add (t, pre_p);
5267 tree temp = create_tmp_var (type, "va_arg_tmp");
5270 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5271 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5272 gimplify_and_add (t, pre_p);
5274 for (i = 0; i < XVECLEN (container, 0); i++)
5276 rtx slot = XVECEXP (container, 0, i);
5277 rtx reg = XEXP (slot, 0);
5278 enum machine_mode mode = GET_MODE (reg);
5279 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5280 tree addr_type = build_pointer_type (piece_type);
5283 tree dest_addr, dest;
5285 if (SSE_REGNO_P (REGNO (reg)))
5287 src_addr = sse_addr;
5288 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5292 src_addr = int_addr;
5293 src_offset = REGNO (reg) * 8;
5295 src_addr = fold_convert (addr_type, src_addr);
5296 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5297 size_int (src_offset));
5298 src = build_va_arg_indirect_ref (src_addr);
5300 dest_addr = fold_convert (addr_type, addr);
5301 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5302 size_int (INTVAL (XEXP (slot, 1))));
5303 dest = build_va_arg_indirect_ref (dest_addr);
5305 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5306 gimplify_and_add (t, pre_p);
5312 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5313 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5314 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5315 gimplify_and_add (t, pre_p);
5319 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5320 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5321 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5322 gimplify_and_add (t, pre_p);
5325 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5326 gimplify_and_add (t, pre_p);
5328 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5329 append_to_statement_list (t, pre_p);
5332 /* ... otherwise out of the overflow area. */
5334 /* Care for on-stack alignment if needed. */
5335 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5336 || integer_zerop (TYPE_SIZE (type)))
5340 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5341 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5342 size_int (align - 1));
5343 t = fold_convert (sizetype, t);
5344 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5346 t = fold_convert (TREE_TYPE (ovf), t);
5348 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5350 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5351 gimplify_and_add (t2, pre_p);
5353 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5354 size_int (rsize * UNITS_PER_WORD));
5355 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5356 gimplify_and_add (t, pre_p);
5360 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5361 append_to_statement_list (t, pre_p);
5364 ptrtype = build_pointer_type (type);
5365 addr = fold_convert (ptrtype, addr);
5368 addr = build_va_arg_indirect_ref (addr);
5369 return build_va_arg_indirect_ref (addr);
5372 /* Return nonzero if OPNUM's MEM should be matched
5373 in movabs* patterns. */
5376 ix86_check_movabs (rtx insn, int opnum)
5380 set = PATTERN (insn);
5381 if (GET_CODE (set) == PARALLEL)
5382 set = XVECEXP (set, 0, 0);
5383 gcc_assert (GET_CODE (set) == SET);
5384 mem = XEXP (set, opnum);
5385 while (GET_CODE (mem) == SUBREG)
5386 mem = SUBREG_REG (mem);
5387 gcc_assert (MEM_P (mem));
5388 return (volatile_ok || !MEM_VOLATILE_P (mem));
5391 /* Initialize the table of extra 80387 mathematical constants. */
5394 init_ext_80387_constants (void)
5396 static const char * cst[5] =
5398 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5399 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5400 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5401 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5402 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5406 for (i = 0; i < 5; i++)
5408 real_from_string (&ext_80387_constants_table[i], cst[i]);
5409 /* Ensure each constant is rounded to XFmode precision. */
5410 real_convert (&ext_80387_constants_table[i],
5411 XFmode, &ext_80387_constants_table[i]);
5414 ext_80387_constants_init = 1;
5417 /* Return true if the constant is something that can be loaded with
5418 a special instruction. */
5421 standard_80387_constant_p (rtx x)
5423 enum machine_mode mode = GET_MODE (x);
5427 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5430 if (x == CONST0_RTX (mode))
5432 if (x == CONST1_RTX (mode))
5435 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5437 /* For XFmode constants, try to find a special 80387 instruction when
5438 optimizing for size or on those CPUs that benefit from them. */
5440 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5444 if (! ext_80387_constants_init)
5445 init_ext_80387_constants ();
5447 for (i = 0; i < 5; i++)
5448 if (real_identical (&r, &ext_80387_constants_table[i]))
5452 /* Load of the constant -0.0 or -1.0 will be split as
5453 fldz;fchs or fld1;fchs sequence. */
5454 if (real_isnegzero (&r))
5456 if (real_identical (&r, &dconstm1))
5462 /* Return the opcode of the special instruction to be used to load
5466 standard_80387_constant_opcode (rtx x)
5468 switch (standard_80387_constant_p (x))
5492 /* Return the CONST_DOUBLE representing the 80387 constant that is
5493 loaded by the specified special instruction. The argument IDX
5494 matches the return value from standard_80387_constant_p. */
5497 standard_80387_constant_rtx (int idx)
5501 if (! ext_80387_constants_init)
5502 init_ext_80387_constants ();
5518 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5522 /* Return 1 if mode is a valid mode for sse. */
5524 standard_sse_mode_p (enum machine_mode mode)
5541 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5544 standard_sse_constant_p (rtx x)
5546 enum machine_mode mode = GET_MODE (x);
5548 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5550 if (vector_all_ones_operand (x, mode)
5551 && standard_sse_mode_p (mode))
5552 return TARGET_SSE2 ? 2 : -1;
5557 /* Return the opcode of the special instruction to be used to load
5561 standard_sse_constant_opcode (rtx insn, rtx x)
5563 switch (standard_sse_constant_p (x))
5566 if (get_attr_mode (insn) == MODE_V4SF)
5567 return "xorps\t%0, %0";
5568 else if (get_attr_mode (insn) == MODE_V2DF)
5569 return "xorpd\t%0, %0";
5571 return "pxor\t%0, %0";
5573 return "pcmpeqd\t%0, %0";
5578 /* Returns 1 if OP contains a symbol reference */
5581 symbolic_reference_mentioned_p (rtx op)
5586 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5589 fmt = GET_RTX_FORMAT (GET_CODE (op));
5590 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5596 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5597 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5601 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5608 /* Return 1 if it is appropriate to emit `ret' instructions in the
5609 body of a function. Do this only if the epilogue is simple, needing a
5610 couple of insns. Prior to reloading, we can't tell how many registers
5611 must be saved, so return 0 then. Return 0 if there is no frame
5612 marker to de-allocate. */
5615 ix86_can_use_return_insn_p (void)
5617 struct ix86_frame frame;
5619 if (! reload_completed || frame_pointer_needed)
5622 /* Don't allow more than 32 pop, since that's all we can do
5623 with one instruction. */
5624 if (current_function_pops_args
5625 && current_function_args_size >= 32768)
5628 ix86_compute_frame_layout (&frame);
5629 return frame.to_allocate == 0 && frame.nregs == 0;
5632 /* Value should be nonzero if functions must have frame pointers.
5633 Zero means the frame pointer need not be set up (and parms may
5634 be accessed via the stack pointer) in functions that seem suitable. */
5637 ix86_frame_pointer_required (void)
5639 /* If we accessed previous frames, then the generated code expects
5640 to be able to access the saved ebp value in our frame. */
5641 if (cfun->machine->accesses_prev_frame)
5644 /* Several x86 os'es need a frame pointer for other reasons,
5645 usually pertaining to setjmp. */
5646 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5649 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5650 the frame pointer by default. Turn it back on now if we've not
5651 got a leaf function. */
5652 if (TARGET_OMIT_LEAF_FRAME_POINTER
5653 && (!current_function_is_leaf
5654 || ix86_current_function_calls_tls_descriptor))
5657 if (current_function_profile)
5663 /* Record that the current function accesses previous call frames. */
5666 ix86_setup_frame_addresses (void)
5668 cfun->machine->accesses_prev_frame = 1;
5671 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5672 # define USE_HIDDEN_LINKONCE 1
5674 # define USE_HIDDEN_LINKONCE 0
5677 static int pic_labels_used;
5679 /* Fills in the label name that should be used for a pc thunk for
5680 the given register. */
5683 get_pc_thunk_name (char name[32], unsigned int regno)
5685 gcc_assert (!TARGET_64BIT);
5687 if (USE_HIDDEN_LINKONCE)
5688 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5690 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5694 /* This function generates code for -fpic that loads %ebx with
5695 the return address of the caller and then returns. */
5698 ix86_file_end (void)
5703 for (regno = 0; regno < 8; ++regno)
5707 if (! ((pic_labels_used >> regno) & 1))
5710 get_pc_thunk_name (name, regno);
5715 switch_to_section (darwin_sections[text_coal_section]);
5716 fputs ("\t.weak_definition\t", asm_out_file);
5717 assemble_name (asm_out_file, name);
5718 fputs ("\n\t.private_extern\t", asm_out_file);
5719 assemble_name (asm_out_file, name);
5720 fputs ("\n", asm_out_file);
5721 ASM_OUTPUT_LABEL (asm_out_file, name);
5725 if (USE_HIDDEN_LINKONCE)
5729 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5731 TREE_PUBLIC (decl) = 1;
5732 TREE_STATIC (decl) = 1;
5733 DECL_ONE_ONLY (decl) = 1;
5735 (*targetm.asm_out.unique_section) (decl, 0);
5736 switch_to_section (get_named_section (decl, NULL, 0));
5738 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5739 fputs ("\t.hidden\t", asm_out_file);
5740 assemble_name (asm_out_file, name);
5741 fputc ('\n', asm_out_file);
5742 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5746 switch_to_section (text_section);
5747 ASM_OUTPUT_LABEL (asm_out_file, name);
5750 xops[0] = gen_rtx_REG (SImode, regno);
5751 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5752 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5753 output_asm_insn ("ret", xops);
5756 if (NEED_INDICATE_EXEC_STACK)
5757 file_end_indicate_exec_stack ();
5760 /* Emit code for the SET_GOT patterns. */
5763 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5769 if (TARGET_VXWORKS_RTP && flag_pic)
5771 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5772 xops[2] = gen_rtx_MEM (Pmode,
5773 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5774 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5776 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5777 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5778 an unadorned address. */
5779 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5780 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5781 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5785 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5787 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5789 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5792 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5794 output_asm_insn ("call\t%a2", xops);
5797 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5798 is what will be referenced by the Mach-O PIC subsystem. */
5800 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5803 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5804 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5807 output_asm_insn ("pop{l}\t%0", xops);
5812 get_pc_thunk_name (name, REGNO (dest));
5813 pic_labels_used |= 1 << REGNO (dest);
5815 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5816 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5817 output_asm_insn ("call\t%X2", xops);
5818 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5819 is what will be referenced by the Mach-O PIC subsystem. */
5822 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5824 targetm.asm_out.internal_label (asm_out_file, "L",
5825 CODE_LABEL_NUMBER (label));
5832 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5833 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5835 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5840 /* Generate an "push" pattern for input ARG. */
5845 return gen_rtx_SET (VOIDmode,
5847 gen_rtx_PRE_DEC (Pmode,
5848 stack_pointer_rtx)),
5852 /* Return >= 0 if there is an unused call-clobbered register available
5853 for the entire function. */
5856 ix86_select_alt_pic_regnum (void)
5858 if (current_function_is_leaf && !current_function_profile
5859 && !ix86_current_function_calls_tls_descriptor)
5862 for (i = 2; i >= 0; --i)
5863 if (!df_regs_ever_live_p (i))
5867 return INVALID_REGNUM;
5870 /* Return 1 if we need to save REGNO. */
5872 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5874 if (pic_offset_table_rtx
5875 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5876 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5877 || current_function_profile
5878 || current_function_calls_eh_return
5879 || current_function_uses_const_pool))
5881 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5886 if (current_function_calls_eh_return && maybe_eh_return)
5891 unsigned test = EH_RETURN_DATA_REGNO (i);
5892 if (test == INVALID_REGNUM)
5899 if (cfun->machine->force_align_arg_pointer
5900 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5903 return (df_regs_ever_live_p (regno)
5904 && !call_used_regs[regno]
5905 && !fixed_regs[regno]
5906 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5909 /* Return number of registers to be saved on the stack. */
5912 ix86_nsaved_regs (void)
5917 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5918 if (ix86_save_reg (regno, true))
5923 /* Return the offset between two registers, one to be eliminated, and the other
5924 its replacement, at the start of a routine. */
5927 ix86_initial_elimination_offset (int from, int to)
5929 struct ix86_frame frame;
5930 ix86_compute_frame_layout (&frame);
5932 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5933 return frame.hard_frame_pointer_offset;
5934 else if (from == FRAME_POINTER_REGNUM
5935 && to == HARD_FRAME_POINTER_REGNUM)
5936 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5939 gcc_assert (to == STACK_POINTER_REGNUM);
5941 if (from == ARG_POINTER_REGNUM)
5942 return frame.stack_pointer_offset;
5944 gcc_assert (from == FRAME_POINTER_REGNUM);
5945 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5949 /* Fill structure ix86_frame about frame of currently computed function. */
5952 ix86_compute_frame_layout (struct ix86_frame *frame)
5954 HOST_WIDE_INT total_size;
5955 unsigned int stack_alignment_needed;
5956 HOST_WIDE_INT offset;
5957 unsigned int preferred_alignment;
5958 HOST_WIDE_INT size = get_frame_size ();
5960 frame->nregs = ix86_nsaved_regs ();
5963 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5964 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5966 /* During reload iteration the amount of registers saved can change.
5967 Recompute the value as needed. Do not recompute when amount of registers
5968 didn't change as reload does multiple calls to the function and does not
5969 expect the decision to change within single iteration. */
5971 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5973 int count = frame->nregs;
5975 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5976 /* The fast prologue uses move instead of push to save registers. This
5977 is significantly longer, but also executes faster as modern hardware
5978 can execute the moves in parallel, but can't do that for push/pop.
5980 Be careful about choosing what prologue to emit: When function takes
5981 many instructions to execute we may use slow version as well as in
5982 case function is known to be outside hot spot (this is known with
5983 feedback only). Weight the size of function by number of registers
5984 to save as it is cheap to use one or two push instructions but very
5985 slow to use many of them. */
5987 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5988 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5989 || (flag_branch_probabilities
5990 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5991 cfun->machine->use_fast_prologue_epilogue = false;
5993 cfun->machine->use_fast_prologue_epilogue
5994 = !expensive_function_p (count);
5996 if (TARGET_PROLOGUE_USING_MOVE
5997 && cfun->machine->use_fast_prologue_epilogue)
5998 frame->save_regs_using_mov = true;
6000 frame->save_regs_using_mov = false;
6003 /* Skip return address and saved base pointer. */
6004 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6006 frame->hard_frame_pointer_offset = offset;
6008 /* Do some sanity checking of stack_alignment_needed and
6009 preferred_alignment, since i386 port is the only using those features
6010 that may break easily. */
6012 gcc_assert (!size || stack_alignment_needed);
6013 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6014 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6015 gcc_assert (stack_alignment_needed
6016 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6018 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6019 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6021 /* Register save area */
6022 offset += frame->nregs * UNITS_PER_WORD;
6025 if (ix86_save_varrargs_registers)
6027 offset += X86_64_VARARGS_SIZE;
6028 frame->va_arg_size = X86_64_VARARGS_SIZE;
6031 frame->va_arg_size = 0;
6033 /* Align start of frame for local function. */
6034 frame->padding1 = ((offset + stack_alignment_needed - 1)
6035 & -stack_alignment_needed) - offset;
6037 offset += frame->padding1;
6039 /* Frame pointer points here. */
6040 frame->frame_pointer_offset = offset;
6044 /* Add outgoing arguments area. Can be skipped if we eliminated
6045 all the function calls as dead code.
6046 Skipping is however impossible when function calls alloca. Alloca
6047 expander assumes that last current_function_outgoing_args_size
6048 of stack frame are unused. */
6049 if (ACCUMULATE_OUTGOING_ARGS
6050 && (!current_function_is_leaf || current_function_calls_alloca
6051 || ix86_current_function_calls_tls_descriptor))
6053 offset += current_function_outgoing_args_size;
6054 frame->outgoing_arguments_size = current_function_outgoing_args_size;
6057 frame->outgoing_arguments_size = 0;
6059 /* Align stack boundary. Only needed if we're calling another function
6061 if (!current_function_is_leaf || current_function_calls_alloca
6062 || ix86_current_function_calls_tls_descriptor)
6063 frame->padding2 = ((offset + preferred_alignment - 1)
6064 & -preferred_alignment) - offset;
6066 frame->padding2 = 0;
6068 offset += frame->padding2;
6070 /* We've reached end of stack frame. */
6071 frame->stack_pointer_offset = offset;
6073 /* Size prologue needs to allocate. */
6074 frame->to_allocate =
6075 (size + frame->padding1 + frame->padding2
6076 + frame->outgoing_arguments_size + frame->va_arg_size);
6078 if ((!frame->to_allocate && frame->nregs <= 1)
6079 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6080 frame->save_regs_using_mov = false;
6082 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6083 && current_function_is_leaf
6084 && !ix86_current_function_calls_tls_descriptor)
6086 frame->red_zone_size = frame->to_allocate;
6087 if (frame->save_regs_using_mov)
6088 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6089 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6090 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6093 frame->red_zone_size = 0;
6094 frame->to_allocate -= frame->red_zone_size;
6095 frame->stack_pointer_offset -= frame->red_zone_size;
6097 fprintf (stderr, "\n");
6098 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6099 fprintf (stderr, "size: %ld\n", (long)size);
6100 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6101 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6102 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6103 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6104 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6105 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6106 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6107 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6108 (long)frame->hard_frame_pointer_offset);
6109 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6110 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6111 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6112 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6116 /* Emit code to save registers in the prologue. */
6119 ix86_emit_save_regs (void)
6124 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6125 if (ix86_save_reg (regno, true))
6127 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6128 RTX_FRAME_RELATED_P (insn) = 1;
6132 /* Emit code to save registers using MOV insns. First register
6133 is restored from POINTER + OFFSET. */
6135 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6140 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6141 if (ix86_save_reg (regno, true))
6143 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6145 gen_rtx_REG (Pmode, regno));
6146 RTX_FRAME_RELATED_P (insn) = 1;
6147 offset += UNITS_PER_WORD;
6151 /* Expand prologue or epilogue stack adjustment.
6152 The pattern exist to put a dependency on all ebp-based memory accesses.
6153 STYLE should be negative if instructions should be marked as frame related,
6154 zero if %r11 register is live and cannot be freely used and positive
6158 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6163 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6164 else if (x86_64_immediate_operand (offset, DImode))
6165 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6169 /* r11 is used by indirect sibcall return as well, set before the
6170 epilogue and used after the epilogue. ATM indirect sibcall
6171 shouldn't be used together with huge frame sizes in one
6172 function because of the frame_size check in sibcall.c. */
6174 r11 = gen_rtx_REG (DImode, R11_REG);
6175 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6177 RTX_FRAME_RELATED_P (insn) = 1;
6178 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6182 RTX_FRAME_RELATED_P (insn) = 1;
6185 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6188 ix86_internal_arg_pointer (void)
6190 bool has_force_align_arg_pointer =
6191 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6192 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6193 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6194 && DECL_NAME (current_function_decl)
6195 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6196 && DECL_FILE_SCOPE_P (current_function_decl))
6197 || ix86_force_align_arg_pointer
6198 || has_force_align_arg_pointer)
6200 /* Nested functions can't realign the stack due to a register
6202 if (DECL_CONTEXT (current_function_decl)
6203 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6205 if (ix86_force_align_arg_pointer)
6206 warning (0, "-mstackrealign ignored for nested functions");
6207 if (has_force_align_arg_pointer)
6208 error ("%s not supported for nested functions",
6209 ix86_force_align_arg_pointer_string);
6210 return virtual_incoming_args_rtx;
6212 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6213 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6216 return virtual_incoming_args_rtx;
6219 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6220 This is called from dwarf2out.c to emit call frame instructions
6221 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6223 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6225 rtx unspec = SET_SRC (pattern);
6226 gcc_assert (GET_CODE (unspec) == UNSPEC);
6230 case UNSPEC_REG_SAVE:
6231 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6232 SET_DEST (pattern));
6234 case UNSPEC_DEF_CFA:
6235 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6236 INTVAL (XVECEXP (unspec, 0, 0)));
6243 /* Expand the prologue into a bunch of separate insns. */
6246 ix86_expand_prologue (void)
6250 struct ix86_frame frame;
6251 HOST_WIDE_INT allocate;
6253 ix86_compute_frame_layout (&frame);
6255 if (cfun->machine->force_align_arg_pointer)
6259 /* Grab the argument pointer. */
6260 x = plus_constant (stack_pointer_rtx, 4);
6261 y = cfun->machine->force_align_arg_pointer;
6262 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6263 RTX_FRAME_RELATED_P (insn) = 1;
6265 /* The unwind info consists of two parts: install the fafp as the cfa,
6266 and record the fafp as the "save register" of the stack pointer.
6267 The later is there in order that the unwinder can see where it
6268 should restore the stack pointer across the and insn. */
6269 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6270 x = gen_rtx_SET (VOIDmode, y, x);
6271 RTX_FRAME_RELATED_P (x) = 1;
6272 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6274 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6275 RTX_FRAME_RELATED_P (y) = 1;
6276 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6277 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6278 REG_NOTES (insn) = x;
6280 /* Align the stack. */
6281 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6284 /* And here we cheat like madmen with the unwind info. We force the
6285 cfa register back to sp+4, which is exactly what it was at the
6286 start of the function. Re-pushing the return address results in
6287 the return at the same spot relative to the cfa, and thus is
6288 correct wrt the unwind info. */
6289 x = cfun->machine->force_align_arg_pointer;
6290 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6291 insn = emit_insn (gen_push (x));
6292 RTX_FRAME_RELATED_P (insn) = 1;
6295 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6296 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6297 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6298 REG_NOTES (insn) = x;
6301 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6302 slower on all targets. Also sdb doesn't like it. */
6304 if (frame_pointer_needed)
6306 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6307 RTX_FRAME_RELATED_P (insn) = 1;
6309 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6310 RTX_FRAME_RELATED_P (insn) = 1;
6313 allocate = frame.to_allocate;
6315 if (!frame.save_regs_using_mov)
6316 ix86_emit_save_regs ();
6318 allocate += frame.nregs * UNITS_PER_WORD;
6320 /* When using red zone we may start register saving before allocating
6321 the stack frame saving one cycle of the prologue. However I will
6322 avoid doing this if I am going to have to probe the stack since
6323 at least on x86_64 the stack probe can turn into a call that clobbers
6324 a red zone location */
6325 if (TARGET_RED_ZONE && frame.save_regs_using_mov
6326 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6327 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6328 : stack_pointer_rtx,
6329 -frame.nregs * UNITS_PER_WORD);
6333 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6334 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6335 GEN_INT (-allocate), -1);
6338 /* Only valid for Win32. */
6339 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6343 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6345 if (TARGET_64BIT_MS_ABI)
6348 eax_live = ix86_eax_live_at_start_p ();
6352 emit_insn (gen_push (eax));
6353 allocate -= UNITS_PER_WORD;
6356 emit_move_insn (eax, GEN_INT (allocate));
6359 insn = gen_allocate_stack_worker_64 (eax);
6361 insn = gen_allocate_stack_worker_32 (eax);
6362 insn = emit_insn (insn);
6363 RTX_FRAME_RELATED_P (insn) = 1;
6364 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6365 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6366 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6367 t, REG_NOTES (insn));
6371 if (frame_pointer_needed)
6372 t = plus_constant (hard_frame_pointer_rtx,
6375 - frame.nregs * UNITS_PER_WORD);
6377 t = plus_constant (stack_pointer_rtx, allocate);
6378 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6382 if (frame.save_regs_using_mov
6383 && !(TARGET_RED_ZONE
6384 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6386 if (!frame_pointer_needed || !frame.to_allocate)
6387 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6389 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6390 -frame.nregs * UNITS_PER_WORD);
6393 pic_reg_used = false;
6394 if (pic_offset_table_rtx
6395 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6396 || current_function_profile))
6398 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6400 if (alt_pic_reg_used != INVALID_REGNUM)
6401 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6403 pic_reg_used = true;
6410 if (ix86_cmodel == CM_LARGE_PIC)
6412 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6413 rtx label = gen_label_rtx ();
6415 LABEL_PRESERVE_P (label) = 1;
6416 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6417 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6418 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6419 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6420 pic_offset_table_rtx, tmp_reg));
6423 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6426 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6429 /* Prevent function calls from being scheduled before the call to mcount.
6430 In the pic_reg_used case, make sure that the got load isn't deleted. */
6431 if (current_function_profile)
6434 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6435 emit_insn (gen_blockage ());
6439 /* Emit code to restore saved registers using MOV insns. First register
6440 is restored from POINTER + OFFSET. */
6442 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6443 int maybe_eh_return)
6446 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6448 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6449 if (ix86_save_reg (regno, maybe_eh_return))
6451 /* Ensure that adjust_address won't be forced to produce pointer
6452 out of range allowed by x86-64 instruction set. */
6453 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6457 r11 = gen_rtx_REG (DImode, R11_REG);
6458 emit_move_insn (r11, GEN_INT (offset));
6459 emit_insn (gen_adddi3 (r11, r11, pointer));
6460 base_address = gen_rtx_MEM (Pmode, r11);
6463 emit_move_insn (gen_rtx_REG (Pmode, regno),
6464 adjust_address (base_address, Pmode, offset));
6465 offset += UNITS_PER_WORD;
6469 /* Restore function stack, frame, and registers. */
6472 ix86_expand_epilogue (int style)
6475 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6476 struct ix86_frame frame;
6477 HOST_WIDE_INT offset;
6479 ix86_compute_frame_layout (&frame);
6481 /* Calculate start of saved registers relative to ebp. Special care
6482 must be taken for the normal return case of a function using
6483 eh_return: the eax and edx registers are marked as saved, but not
6484 restored along this path. */
6485 offset = frame.nregs;
6486 if (current_function_calls_eh_return && style != 2)
6488 offset *= -UNITS_PER_WORD;
6490 /* If we're only restoring one register and sp is not valid then
6491 using a move instruction to restore the register since it's
6492 less work than reloading sp and popping the register.
6494 The default code result in stack adjustment using add/lea instruction,
6495 while this code results in LEAVE instruction (or discrete equivalent),
6496 so it is profitable in some other cases as well. Especially when there
6497 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6498 and there is exactly one register to pop. This heuristic may need some
6499 tuning in future. */
6500 if ((!sp_valid && frame.nregs <= 1)
6501 || (TARGET_EPILOGUE_USING_MOVE
6502 && cfun->machine->use_fast_prologue_epilogue
6503 && (frame.nregs > 1 || frame.to_allocate))
6504 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6505 || (frame_pointer_needed && TARGET_USE_LEAVE
6506 && cfun->machine->use_fast_prologue_epilogue
6507 && frame.nregs == 1)
6508 || current_function_calls_eh_return)
6510 /* Restore registers. We can use ebp or esp to address the memory
6511 locations. If both are available, default to ebp, since offsets
6512 are known to be small. Only exception is esp pointing directly to the
6513 end of block of saved registers, where we may simplify addressing
6516 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6517 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6518 frame.to_allocate, style == 2);
6520 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6521 offset, style == 2);
6523 /* eh_return epilogues need %ecx added to the stack pointer. */
6526 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6528 if (frame_pointer_needed)
6530 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6531 tmp = plus_constant (tmp, UNITS_PER_WORD);
6532 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6534 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6535 emit_move_insn (hard_frame_pointer_rtx, tmp);
6537 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6542 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6543 tmp = plus_constant (tmp, (frame.to_allocate
6544 + frame.nregs * UNITS_PER_WORD));
6545 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6548 else if (!frame_pointer_needed)
6549 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6550 GEN_INT (frame.to_allocate
6551 + frame.nregs * UNITS_PER_WORD),
6553 /* If not an i386, mov & pop is faster than "leave". */
6554 else if (TARGET_USE_LEAVE || optimize_size
6555 || !cfun->machine->use_fast_prologue_epilogue)
6556 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6559 pro_epilogue_adjust_stack (stack_pointer_rtx,
6560 hard_frame_pointer_rtx,
6563 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6565 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6570 /* First step is to deallocate the stack frame so that we can
6571 pop the registers. */
6574 gcc_assert (frame_pointer_needed);
6575 pro_epilogue_adjust_stack (stack_pointer_rtx,
6576 hard_frame_pointer_rtx,
6577 GEN_INT (offset), style);
6579 else if (frame.to_allocate)
6580 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6581 GEN_INT (frame.to_allocate), style);
6583 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6584 if (ix86_save_reg (regno, false))
6587 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6589 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6591 if (frame_pointer_needed)
6593 /* Leave results in shorter dependency chains on CPUs that are
6594 able to grok it fast. */
6595 if (TARGET_USE_LEAVE)
6596 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6597 else if (TARGET_64BIT)
6598 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6600 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6604 if (cfun->machine->force_align_arg_pointer)
6606 emit_insn (gen_addsi3 (stack_pointer_rtx,
6607 cfun->machine->force_align_arg_pointer,
6611 /* Sibcall epilogues don't want a return instruction. */
6615 if (current_function_pops_args && current_function_args_size)
6617 rtx popc = GEN_INT (current_function_pops_args);
6619 /* i386 can only pop 64K bytes. If asked to pop more, pop
6620 return address, do explicit add, and jump indirectly to the
6623 if (current_function_pops_args >= 65536)
6625 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6627 /* There is no "pascal" calling convention in any 64bit ABI. */
6628 gcc_assert (!TARGET_64BIT);
6630 emit_insn (gen_popsi1 (ecx));
6631 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6632 emit_jump_insn (gen_return_indirect_internal (ecx));
6635 emit_jump_insn (gen_return_pop_internal (popc));
6638 emit_jump_insn (gen_return_internal ());
6641 /* Reset from the function's potential modifications. */
6644 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6645 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6647 if (pic_offset_table_rtx)
6648 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6650 /* Mach-O doesn't support labels at the end of objects, so if
6651 it looks like we might want one, insert a NOP. */
6653 rtx insn = get_last_insn ();
6656 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6657 insn = PREV_INSN (insn);
6661 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6662 fputs ("\tnop\n", file);
6668 /* Extract the parts of an RTL expression that is a valid memory address
6669 for an instruction. Return 0 if the structure of the address is
6670 grossly off. Return -1 if the address contains ASHIFT, so it is not
6671 strictly valid, but still used for computing length of lea instruction. */
6674 ix86_decompose_address (rtx addr, struct ix86_address *out)
6676 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6677 rtx base_reg, index_reg;
6678 HOST_WIDE_INT scale = 1;
6679 rtx scale_rtx = NULL_RTX;
6681 enum ix86_address_seg seg = SEG_DEFAULT;
6683 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6685 else if (GET_CODE (addr) == PLUS)
6695 addends[n++] = XEXP (op, 1);
6698 while (GET_CODE (op) == PLUS);
6703 for (i = n; i >= 0; --i)
6706 switch (GET_CODE (op))
6711 index = XEXP (op, 0);
6712 scale_rtx = XEXP (op, 1);
6716 if (XINT (op, 1) == UNSPEC_TP
6717 && TARGET_TLS_DIRECT_SEG_REFS
6718 && seg == SEG_DEFAULT)
6719 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6748 else if (GET_CODE (addr) == MULT)
6750 index = XEXP (addr, 0); /* index*scale */
6751 scale_rtx = XEXP (addr, 1);
6753 else if (GET_CODE (addr) == ASHIFT)
6757 /* We're called for lea too, which implements ashift on occasion. */
6758 index = XEXP (addr, 0);
6759 tmp = XEXP (addr, 1);
6760 if (!CONST_INT_P (tmp))
6762 scale = INTVAL (tmp);
6763 if ((unsigned HOST_WIDE_INT) scale > 3)
6769 disp = addr; /* displacement */
6771 /* Extract the integral value of scale. */
6774 if (!CONST_INT_P (scale_rtx))
6776 scale = INTVAL (scale_rtx);
6779 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6780 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6782 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6783 if (base_reg && index_reg && scale == 1
6784 && (index_reg == arg_pointer_rtx
6785 || index_reg == frame_pointer_rtx
6786 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6789 tmp = base, base = index, index = tmp;
6790 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6793 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6794 if ((base_reg == hard_frame_pointer_rtx
6795 || base_reg == frame_pointer_rtx
6796 || base_reg == arg_pointer_rtx) && !disp)
6799 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6800 Avoid this by transforming to [%esi+0]. */
6801 if (TARGET_K6 && !optimize_size
6802 && base_reg && !index_reg && !disp
6804 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6807 /* Special case: encode reg+reg instead of reg*2. */
6808 if (!base && index && scale && scale == 2)
6809 base = index, base_reg = index_reg, scale = 1;
6811 /* Special case: scaling cannot be encoded without base or displacement. */
6812 if (!base && !disp && index && scale != 1)
6824 /* Return cost of the memory address x.
6825 For i386, it is better to use a complex address than let gcc copy
6826 the address into a reg and make a new pseudo. But not if the address
6827 requires to two regs - that would mean more pseudos with longer
6830 ix86_address_cost (rtx x)
6832 struct ix86_address parts;
6834 int ok = ix86_decompose_address (x, &parts);
6838 if (parts.base && GET_CODE (parts.base) == SUBREG)
6839 parts.base = SUBREG_REG (parts.base);
6840 if (parts.index && GET_CODE (parts.index) == SUBREG)
6841 parts.index = SUBREG_REG (parts.index);
6843 /* Attempt to minimize number of registers in the address. */
6845 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6847 && (!REG_P (parts.index)
6848 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6852 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6854 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6855 && parts.base != parts.index)
6858 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6859 since it's predecode logic can't detect the length of instructions
6860 and it degenerates to vector decoded. Increase cost of such
6861 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6862 to split such addresses or even refuse such addresses at all.
6864 Following addressing modes are affected:
6869 The first and last case may be avoidable by explicitly coding the zero in
6870 memory address, but I don't have AMD-K6 machine handy to check this
6874 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6875 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6876 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6882 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6883 this is used for to form addresses to local data when -fPIC is in
6887 darwin_local_data_pic (rtx disp)
6889 if (GET_CODE (disp) == MINUS)
6891 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6892 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6893 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6895 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6896 if (! strcmp (sym_name, "<pic base>"))
6904 /* Determine if a given RTX is a valid constant. We already know this
6905 satisfies CONSTANT_P. */
6908 legitimate_constant_p (rtx x)
6910 switch (GET_CODE (x))
6915 if (GET_CODE (x) == PLUS)
6917 if (!CONST_INT_P (XEXP (x, 1)))
6922 if (TARGET_MACHO && darwin_local_data_pic (x))
6925 /* Only some unspecs are valid as "constants". */
6926 if (GET_CODE (x) == UNSPEC)
6927 switch (XINT (x, 1))
6932 return TARGET_64BIT;
6935 x = XVECEXP (x, 0, 0);
6936 return (GET_CODE (x) == SYMBOL_REF
6937 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6939 x = XVECEXP (x, 0, 0);
6940 return (GET_CODE (x) == SYMBOL_REF
6941 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6946 /* We must have drilled down to a symbol. */
6947 if (GET_CODE (x) == LABEL_REF)
6949 if (GET_CODE (x) != SYMBOL_REF)
6954 /* TLS symbols are never valid. */
6955 if (SYMBOL_REF_TLS_MODEL (x))
6958 /* DLLIMPORT symbols are never valid. */
6959 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6960 && SYMBOL_REF_DLLIMPORT_P (x))
6965 if (GET_MODE (x) == TImode
6966 && x != CONST0_RTX (TImode)
6972 if (x == CONST0_RTX (GET_MODE (x)))
6980 /* Otherwise we handle everything else in the move patterns. */
6984 /* Determine if it's legal to put X into the constant pool. This
6985 is not possible for the address of thread-local symbols, which
6986 is checked above. */
6989 ix86_cannot_force_const_mem (rtx x)
6991 /* We can always put integral constants and vectors in memory. */
6992 switch (GET_CODE (x))
7002 return !legitimate_constant_p (x);
7005 /* Determine if a given RTX is a valid constant address. */
7008 constant_address_p (rtx x)
7010 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7013 /* Nonzero if the constant value X is a legitimate general operand
7014 when generating PIC code. It is given that flag_pic is on and
7015 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7018 legitimate_pic_operand_p (rtx x)
7022 switch (GET_CODE (x))
7025 inner = XEXP (x, 0);
7026 if (GET_CODE (inner) == PLUS
7027 && CONST_INT_P (XEXP (inner, 1)))
7028 inner = XEXP (inner, 0);
7030 /* Only some unspecs are valid as "constants". */
7031 if (GET_CODE (inner) == UNSPEC)
7032 switch (XINT (inner, 1))
7037 return TARGET_64BIT;
7039 x = XVECEXP (inner, 0, 0);
7040 return (GET_CODE (x) == SYMBOL_REF
7041 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7049 return legitimate_pic_address_disp_p (x);
7056 /* Determine if a given CONST RTX is a valid memory displacement
7060 legitimate_pic_address_disp_p (rtx disp)
7064 /* In 64bit mode we can allow direct addresses of symbols and labels
7065 when they are not dynamic symbols. */
7068 rtx op0 = disp, op1;
7070 switch (GET_CODE (disp))
7076 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7078 op0 = XEXP (XEXP (disp, 0), 0);
7079 op1 = XEXP (XEXP (disp, 0), 1);
7080 if (!CONST_INT_P (op1)
7081 || INTVAL (op1) >= 16*1024*1024
7082 || INTVAL (op1) < -16*1024*1024)
7084 if (GET_CODE (op0) == LABEL_REF)
7086 if (GET_CODE (op0) != SYMBOL_REF)
7091 /* TLS references should always be enclosed in UNSPEC. */
7092 if (SYMBOL_REF_TLS_MODEL (op0))
7094 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7095 && ix86_cmodel != CM_LARGE_PIC)
7103 if (GET_CODE (disp) != CONST)
7105 disp = XEXP (disp, 0);
7109 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7110 of GOT tables. We should not need these anyway. */
7111 if (GET_CODE (disp) != UNSPEC
7112 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7113 && XINT (disp, 1) != UNSPEC_GOTOFF
7114 && XINT (disp, 1) != UNSPEC_PLTOFF))
7117 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7118 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7124 if (GET_CODE (disp) == PLUS)
7126 if (!CONST_INT_P (XEXP (disp, 1)))
7128 disp = XEXP (disp, 0);
7132 if (TARGET_MACHO && darwin_local_data_pic (disp))
7135 if (GET_CODE (disp) != UNSPEC)
7138 switch (XINT (disp, 1))
7143 /* We need to check for both symbols and labels because VxWorks loads
7144 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7146 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7147 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7149 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7150 While ABI specify also 32bit relocation but we don't produce it in
7151 small PIC model at all. */
7152 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7153 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7155 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7157 case UNSPEC_GOTTPOFF:
7158 case UNSPEC_GOTNTPOFF:
7159 case UNSPEC_INDNTPOFF:
7162 disp = XVECEXP (disp, 0, 0);
7163 return (GET_CODE (disp) == SYMBOL_REF
7164 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7166 disp = XVECEXP (disp, 0, 0);
7167 return (GET_CODE (disp) == SYMBOL_REF
7168 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7170 disp = XVECEXP (disp, 0, 0);
7171 return (GET_CODE (disp) == SYMBOL_REF
7172 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7178 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7179 memory address for an instruction. The MODE argument is the machine mode
7180 for the MEM expression that wants to use this address.
7182 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7183 convert common non-canonical forms to canonical form so that they will
7187 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7188 rtx addr, int strict)
7190 struct ix86_address parts;
7191 rtx base, index, disp;
7192 HOST_WIDE_INT scale;
7193 const char *reason = NULL;
7194 rtx reason_rtx = NULL_RTX;
7196 if (ix86_decompose_address (addr, &parts) <= 0)
7198 reason = "decomposition failed";
7203 index = parts.index;
7205 scale = parts.scale;
7207 /* Validate base register.
7209 Don't allow SUBREG's that span more than a word here. It can lead to spill
7210 failures when the base is one word out of a two word structure, which is
7211 represented internally as a DImode int. */
7220 else if (GET_CODE (base) == SUBREG
7221 && REG_P (SUBREG_REG (base))
7222 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7224 reg = SUBREG_REG (base);
7227 reason = "base is not a register";
7231 if (GET_MODE (base) != Pmode)
7233 reason = "base is not in Pmode";
7237 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7238 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7240 reason = "base is not valid";
7245 /* Validate index register.
7247 Don't allow SUBREG's that span more than a word here -- same as above. */
7256 else if (GET_CODE (index) == SUBREG
7257 && REG_P (SUBREG_REG (index))
7258 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7260 reg = SUBREG_REG (index);
7263 reason = "index is not a register";
7267 if (GET_MODE (index) != Pmode)
7269 reason = "index is not in Pmode";
7273 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7274 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7276 reason = "index is not valid";
7281 /* Validate scale factor. */
7284 reason_rtx = GEN_INT (scale);
7287 reason = "scale without index";
7291 if (scale != 2 && scale != 4 && scale != 8)
7293 reason = "scale is not a valid multiplier";
7298 /* Validate displacement. */
7303 if (GET_CODE (disp) == CONST
7304 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7305 switch (XINT (XEXP (disp, 0), 1))
7307 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7308 used. While ABI specify also 32bit relocations, we don't produce
7309 them at all and use IP relative instead. */
7312 gcc_assert (flag_pic);
7314 goto is_legitimate_pic;
7315 reason = "64bit address unspec";
7318 case UNSPEC_GOTPCREL:
7319 gcc_assert (flag_pic);
7320 goto is_legitimate_pic;
7322 case UNSPEC_GOTTPOFF:
7323 case UNSPEC_GOTNTPOFF:
7324 case UNSPEC_INDNTPOFF:
7330 reason = "invalid address unspec";
7334 else if (SYMBOLIC_CONST (disp)
7338 && MACHOPIC_INDIRECT
7339 && !machopic_operand_p (disp)
7345 if (TARGET_64BIT && (index || base))
7347 /* foo@dtpoff(%rX) is ok. */
7348 if (GET_CODE (disp) != CONST
7349 || GET_CODE (XEXP (disp, 0)) != PLUS
7350 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7351 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7352 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7353 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7355 reason = "non-constant pic memory reference";
7359 else if (! legitimate_pic_address_disp_p (disp))
7361 reason = "displacement is an invalid pic construct";
7365 /* This code used to verify that a symbolic pic displacement
7366 includes the pic_offset_table_rtx register.
7368 While this is good idea, unfortunately these constructs may
7369 be created by "adds using lea" optimization for incorrect
7378 This code is nonsensical, but results in addressing
7379 GOT table with pic_offset_table_rtx base. We can't
7380 just refuse it easily, since it gets matched by
7381 "addsi3" pattern, that later gets split to lea in the
7382 case output register differs from input. While this
7383 can be handled by separate addsi pattern for this case
7384 that never results in lea, this seems to be easier and
7385 correct fix for crash to disable this test. */
7387 else if (GET_CODE (disp) != LABEL_REF
7388 && !CONST_INT_P (disp)
7389 && (GET_CODE (disp) != CONST
7390 || !legitimate_constant_p (disp))
7391 && (GET_CODE (disp) != SYMBOL_REF
7392 || !legitimate_constant_p (disp)))
7394 reason = "displacement is not constant";
7397 else if (TARGET_64BIT
7398 && !x86_64_immediate_operand (disp, VOIDmode))
7400 reason = "displacement is out of range";
7405 /* Everything looks valid. */
7412 /* Return a unique alias set for the GOT. */
7414 static alias_set_type
7415 ix86_GOT_alias_set (void)
7417 static alias_set_type set = -1;
7419 set = new_alias_set ();
7423 /* Return a legitimate reference for ORIG (an address) using the
7424 register REG. If REG is 0, a new pseudo is generated.
7426 There are two types of references that must be handled:
7428 1. Global data references must load the address from the GOT, via
7429 the PIC reg. An insn is emitted to do this load, and the reg is
7432 2. Static data references, constant pool addresses, and code labels
7433 compute the address as an offset from the GOT, whose base is in
7434 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7435 differentiate them from global data objects. The returned
7436 address is the PIC reg + an unspec constant.
7438 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7439 reg also appears in the address. */
7442 legitimize_pic_address (rtx orig, rtx reg)
7449 if (TARGET_MACHO && !TARGET_64BIT)
7452 reg = gen_reg_rtx (Pmode);
7453 /* Use the generic Mach-O PIC machinery. */
7454 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7458 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7460 else if (TARGET_64BIT
7461 && ix86_cmodel != CM_SMALL_PIC
7462 && gotoff_operand (addr, Pmode))
7465 /* This symbol may be referenced via a displacement from the PIC
7466 base address (@GOTOFF). */
7468 if (reload_in_progress)
7469 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7470 if (GET_CODE (addr) == CONST)
7471 addr = XEXP (addr, 0);
7472 if (GET_CODE (addr) == PLUS)
7474 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7476 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7479 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7480 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7482 tmpreg = gen_reg_rtx (Pmode);
7485 emit_move_insn (tmpreg, new_rtx);
7489 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7490 tmpreg, 1, OPTAB_DIRECT);
7493 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7495 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7497 /* This symbol may be referenced via a displacement from the PIC
7498 base address (@GOTOFF). */
7500 if (reload_in_progress)
7501 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7502 if (GET_CODE (addr) == CONST)
7503 addr = XEXP (addr, 0);
7504 if (GET_CODE (addr) == PLUS)
7506 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7508 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7511 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7512 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7513 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7517 emit_move_insn (reg, new_rtx);
7521 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7522 /* We can't use @GOTOFF for text labels on VxWorks;
7523 see gotoff_operand. */
7524 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7526 /* Given that we've already handled dllimport variables separately
7527 in legitimize_address, and all other variables should satisfy
7528 legitimate_pic_address_disp_p, we should never arrive here. */
7529 gcc_assert (!TARGET_64BIT_MS_ABI);
7531 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7533 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7534 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7535 new_rtx = gen_const_mem (Pmode, new_rtx);
7536 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7539 reg = gen_reg_rtx (Pmode);
7540 /* Use directly gen_movsi, otherwise the address is loaded
7541 into register for CSE. We don't want to CSE this addresses,
7542 instead we CSE addresses from the GOT table, so skip this. */
7543 emit_insn (gen_movsi (reg, new_rtx));
7548 /* This symbol must be referenced via a load from the
7549 Global Offset Table (@GOT). */
7551 if (reload_in_progress)
7552 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7553 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7554 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7556 new_rtx = force_reg (Pmode, new_rtx);
7557 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7558 new_rtx = gen_const_mem (Pmode, new_rtx);
7559 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7562 reg = gen_reg_rtx (Pmode);
7563 emit_move_insn (reg, new_rtx);
7569 if (CONST_INT_P (addr)
7570 && !x86_64_immediate_operand (addr, VOIDmode))
7574 emit_move_insn (reg, addr);
7578 new_rtx = force_reg (Pmode, addr);
7580 else if (GET_CODE (addr) == CONST)
7582 addr = XEXP (addr, 0);
7584 /* We must match stuff we generate before. Assume the only
7585 unspecs that can get here are ours. Not that we could do
7586 anything with them anyway.... */
7587 if (GET_CODE (addr) == UNSPEC
7588 || (GET_CODE (addr) == PLUS
7589 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7591 gcc_assert (GET_CODE (addr) == PLUS);
7593 if (GET_CODE (addr) == PLUS)
7595 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7597 /* Check first to see if this is a constant offset from a @GOTOFF
7598 symbol reference. */
7599 if (gotoff_operand (op0, Pmode)
7600 && CONST_INT_P (op1))
7604 if (reload_in_progress)
7605 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7606 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7608 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7609 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7610 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7614 emit_move_insn (reg, new_rtx);
7620 if (INTVAL (op1) < -16*1024*1024
7621 || INTVAL (op1) >= 16*1024*1024)
7623 if (!x86_64_immediate_operand (op1, Pmode))
7624 op1 = force_reg (Pmode, op1);
7625 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7631 base = legitimize_pic_address (XEXP (addr, 0), reg);
7632 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7633 base == reg ? NULL_RTX : reg);
7635 if (CONST_INT_P (new_rtx))
7636 new_rtx = plus_constant (base, INTVAL (new_rtx));
7639 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7641 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7642 new_rtx = XEXP (new_rtx, 1);
7644 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7652 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7655 get_thread_pointer (int to_reg)
7659 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7663 reg = gen_reg_rtx (Pmode);
7664 insn = gen_rtx_SET (VOIDmode, reg, tp);
7665 insn = emit_insn (insn);
7670 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7671 false if we expect this to be used for a memory address and true if
7672 we expect to load the address into a register. */
7675 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7677 rtx dest, base, off, pic, tp;
7682 case TLS_MODEL_GLOBAL_DYNAMIC:
7683 dest = gen_reg_rtx (Pmode);
7684 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7686 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7688 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7691 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7692 insns = get_insns ();
7695 CONST_OR_PURE_CALL_P (insns) = 1;
7696 emit_libcall_block (insns, dest, rax, x);
7698 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7699 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7701 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7703 if (TARGET_GNU2_TLS)
7705 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7707 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7711 case TLS_MODEL_LOCAL_DYNAMIC:
7712 base = gen_reg_rtx (Pmode);
7713 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7715 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7717 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7720 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7721 insns = get_insns ();
7724 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7725 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7726 CONST_OR_PURE_CALL_P (insns) = 1;
7727 emit_libcall_block (insns, base, rax, note);
7729 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7730 emit_insn (gen_tls_local_dynamic_base_64 (base));
7732 emit_insn (gen_tls_local_dynamic_base_32 (base));
7734 if (TARGET_GNU2_TLS)
7736 rtx x = ix86_tls_module_base ();
7738 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7739 gen_rtx_MINUS (Pmode, x, tp));
7742 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7743 off = gen_rtx_CONST (Pmode, off);
7745 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7747 if (TARGET_GNU2_TLS)
7749 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7751 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7756 case TLS_MODEL_INITIAL_EXEC:
7760 type = UNSPEC_GOTNTPOFF;
7764 if (reload_in_progress)
7765 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7766 pic = pic_offset_table_rtx;
7767 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7769 else if (!TARGET_ANY_GNU_TLS)
7771 pic = gen_reg_rtx (Pmode);
7772 emit_insn (gen_set_got (pic));
7773 type = UNSPEC_GOTTPOFF;
7778 type = UNSPEC_INDNTPOFF;
7781 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7782 off = gen_rtx_CONST (Pmode, off);
7784 off = gen_rtx_PLUS (Pmode, pic, off);
7785 off = gen_const_mem (Pmode, off);
7786 set_mem_alias_set (off, ix86_GOT_alias_set ());
7788 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7790 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7791 off = force_reg (Pmode, off);
7792 return gen_rtx_PLUS (Pmode, base, off);
7796 base = get_thread_pointer (true);
7797 dest = gen_reg_rtx (Pmode);
7798 emit_insn (gen_subsi3 (dest, base, off));
7802 case TLS_MODEL_LOCAL_EXEC:
7803 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7804 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7805 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7806 off = gen_rtx_CONST (Pmode, off);
7808 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7810 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7811 return gen_rtx_PLUS (Pmode, base, off);
7815 base = get_thread_pointer (true);
7816 dest = gen_reg_rtx (Pmode);
7817 emit_insn (gen_subsi3 (dest, base, off));
7828 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7831 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7832 htab_t dllimport_map;
7835 get_dllimport_decl (tree decl)
7837 struct tree_map *h, in;
7841 size_t namelen, prefixlen;
7847 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7849 in.hash = htab_hash_pointer (decl);
7850 in.base.from = decl;
7851 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7852 h = (struct tree_map *) *loc;
7856 *loc = h = GGC_NEW (struct tree_map);
7858 h->base.from = decl;
7859 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7860 DECL_ARTIFICIAL (to) = 1;
7861 DECL_IGNORED_P (to) = 1;
7862 DECL_EXTERNAL (to) = 1;
7863 TREE_READONLY (to) = 1;
7865 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7866 name = targetm.strip_name_encoding (name);
7867 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
7868 namelen = strlen (name);
7869 prefixlen = strlen (prefix);
7870 imp_name = (char *) alloca (namelen + prefixlen + 1);
7871 memcpy (imp_name, prefix, prefixlen);
7872 memcpy (imp_name + prefixlen, name, namelen + 1);
7874 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7875 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7876 SET_SYMBOL_REF_DECL (rtl, to);
7877 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7879 rtl = gen_const_mem (Pmode, rtl);
7880 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7882 SET_DECL_RTL (to, rtl);
7883 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
7888 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7889 true if we require the result be a register. */
7892 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7897 gcc_assert (SYMBOL_REF_DECL (symbol));
7898 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7900 x = DECL_RTL (imp_decl);
7902 x = force_reg (Pmode, x);
7906 /* Try machine-dependent ways of modifying an illegitimate address
7907 to be legitimate. If we find one, return the new, valid address.
7908 This macro is used in only one place: `memory_address' in explow.c.
7910 OLDX is the address as it was before break_out_memory_refs was called.
7911 In some cases it is useful to look at this to decide what needs to be done.
7913 MODE and WIN are passed so that this macro can use
7914 GO_IF_LEGITIMATE_ADDRESS.
7916 It is always safe for this macro to do nothing. It exists to recognize
7917 opportunities to optimize the output.
7919 For the 80386, we handle X+REG by loading X into a register R and
7920 using R+REG. R will go in a general reg and indexing will be used.
7921 However, if REG is a broken-out memory address or multiplication,
7922 nothing needs to be done because REG can certainly go in a general reg.
7924 When -fpic is used, special handling is needed for symbolic references.
7925 See comments by legitimize_pic_address in i386.c for details. */
7928 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7933 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7935 return legitimize_tls_address (x, (enum tls_model) log, false);
7936 if (GET_CODE (x) == CONST
7937 && GET_CODE (XEXP (x, 0)) == PLUS
7938 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7939 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7941 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7942 (enum tls_model) log, false);
7943 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7946 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7948 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7949 return legitimize_dllimport_symbol (x, true);
7950 if (GET_CODE (x) == CONST
7951 && GET_CODE (XEXP (x, 0)) == PLUS
7952 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7953 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7955 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7956 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7960 if (flag_pic && SYMBOLIC_CONST (x))
7961 return legitimize_pic_address (x, 0);
7963 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7964 if (GET_CODE (x) == ASHIFT
7965 && CONST_INT_P (XEXP (x, 1))
7966 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7969 log = INTVAL (XEXP (x, 1));
7970 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7971 GEN_INT (1 << log));
7974 if (GET_CODE (x) == PLUS)
7976 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7978 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7979 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7980 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7983 log = INTVAL (XEXP (XEXP (x, 0), 1));
7984 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7985 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7986 GEN_INT (1 << log));
7989 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7990 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7991 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7994 log = INTVAL (XEXP (XEXP (x, 1), 1));
7995 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7996 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7997 GEN_INT (1 << log));
8000 /* Put multiply first if it isn't already. */
8001 if (GET_CODE (XEXP (x, 1)) == MULT)
8003 rtx tmp = XEXP (x, 0);
8004 XEXP (x, 0) = XEXP (x, 1);
8009 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8010 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8011 created by virtual register instantiation, register elimination, and
8012 similar optimizations. */
8013 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8016 x = gen_rtx_PLUS (Pmode,
8017 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8018 XEXP (XEXP (x, 1), 0)),
8019 XEXP (XEXP (x, 1), 1));
8023 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8024 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8025 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8026 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8027 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8028 && CONSTANT_P (XEXP (x, 1)))
8031 rtx other = NULL_RTX;
8033 if (CONST_INT_P (XEXP (x, 1)))
8035 constant = XEXP (x, 1);
8036 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8038 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8040 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8041 other = XEXP (x, 1);
8049 x = gen_rtx_PLUS (Pmode,
8050 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8051 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8052 plus_constant (other, INTVAL (constant)));
8056 if (changed && legitimate_address_p (mode, x, FALSE))
8059 if (GET_CODE (XEXP (x, 0)) == MULT)
8062 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8065 if (GET_CODE (XEXP (x, 1)) == MULT)
8068 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8072 && REG_P (XEXP (x, 1))
8073 && REG_P (XEXP (x, 0)))
8076 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8079 x = legitimize_pic_address (x, 0);
8082 if (changed && legitimate_address_p (mode, x, FALSE))
8085 if (REG_P (XEXP (x, 0)))
8087 rtx temp = gen_reg_rtx (Pmode);
8088 rtx val = force_operand (XEXP (x, 1), temp);
8090 emit_move_insn (temp, val);
8096 else if (REG_P (XEXP (x, 1)))
8098 rtx temp = gen_reg_rtx (Pmode);
8099 rtx val = force_operand (XEXP (x, 0), temp);
8101 emit_move_insn (temp, val);
8111 /* Print an integer constant expression in assembler syntax. Addition
8112 and subtraction are the only arithmetic that may appear in these
8113 expressions. FILE is the stdio stream to write to, X is the rtx, and
8114 CODE is the operand print code from the output string. */
8117 output_pic_addr_const (FILE *file, rtx x, int code)
8121 switch (GET_CODE (x))
8124 gcc_assert (flag_pic);
8129 if (! TARGET_MACHO || TARGET_64BIT)
8130 output_addr_const (file, x);
8133 const char *name = XSTR (x, 0);
8135 /* Mark the decl as referenced so that cgraph will
8136 output the function. */
8137 if (SYMBOL_REF_DECL (x))
8138 mark_decl_referenced (SYMBOL_REF_DECL (x));
8141 if (MACHOPIC_INDIRECT
8142 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8143 name = machopic_indirection_name (x, /*stub_p=*/true);
8145 assemble_name (file, name);
8147 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8148 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8149 fputs ("@PLT", file);
8156 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8157 assemble_name (asm_out_file, buf);
8161 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8165 /* This used to output parentheses around the expression,
8166 but that does not work on the 386 (either ATT or BSD assembler). */
8167 output_pic_addr_const (file, XEXP (x, 0), code);
8171 if (GET_MODE (x) == VOIDmode)
8173 /* We can use %d if the number is <32 bits and positive. */
8174 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8175 fprintf (file, "0x%lx%08lx",
8176 (unsigned long) CONST_DOUBLE_HIGH (x),
8177 (unsigned long) CONST_DOUBLE_LOW (x));
8179 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8182 /* We can't handle floating point constants;
8183 PRINT_OPERAND must handle them. */
8184 output_operand_lossage ("floating constant misused");
8188 /* Some assemblers need integer constants to appear first. */
8189 if (CONST_INT_P (XEXP (x, 0)))
8191 output_pic_addr_const (file, XEXP (x, 0), code);
8193 output_pic_addr_const (file, XEXP (x, 1), code);
8197 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8198 output_pic_addr_const (file, XEXP (x, 1), code);
8200 output_pic_addr_const (file, XEXP (x, 0), code);
8206 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8207 output_pic_addr_const (file, XEXP (x, 0), code);
8209 output_pic_addr_const (file, XEXP (x, 1), code);
8211 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8215 gcc_assert (XVECLEN (x, 0) == 1);
8216 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8217 switch (XINT (x, 1))
8220 fputs ("@GOT", file);
8223 fputs ("@GOTOFF", file);
8226 fputs ("@PLTOFF", file);
8228 case UNSPEC_GOTPCREL:
8229 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8230 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8232 case UNSPEC_GOTTPOFF:
8233 /* FIXME: This might be @TPOFF in Sun ld too. */
8234 fputs ("@GOTTPOFF", file);
8237 fputs ("@TPOFF", file);
8241 fputs ("@TPOFF", file);
8243 fputs ("@NTPOFF", file);
8246 fputs ("@DTPOFF", file);
8248 case UNSPEC_GOTNTPOFF:
8250 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8251 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8253 fputs ("@GOTNTPOFF", file);
8255 case UNSPEC_INDNTPOFF:
8256 fputs ("@INDNTPOFF", file);
8259 output_operand_lossage ("invalid UNSPEC as operand");
8265 output_operand_lossage ("invalid expression as operand");
8269 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8270 We need to emit DTP-relative relocations. */
8272 static void ATTRIBUTE_UNUSED
8273 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8275 fputs (ASM_LONG, file);
8276 output_addr_const (file, x);
8277 fputs ("@DTPOFF", file);
8283 fputs (", 0", file);
8290 /* In the name of slightly smaller debug output, and to cater to
8291 general assembler lossage, recognize PIC+GOTOFF and turn it back
8292 into a direct symbol reference.
8294 On Darwin, this is necessary to avoid a crash, because Darwin
8295 has a different PIC label for each routine but the DWARF debugging
8296 information is not associated with any particular routine, so it's
8297 necessary to remove references to the PIC label from RTL stored by
8298 the DWARF output code. */
8301 ix86_delegitimize_address (rtx orig_x)
8304 /* reg_addend is NULL or a multiple of some register. */
8305 rtx reg_addend = NULL_RTX;
8306 /* const_addend is NULL or a const_int. */
8307 rtx const_addend = NULL_RTX;
8308 /* This is the result, or NULL. */
8309 rtx result = NULL_RTX;
8316 if (GET_CODE (x) != CONST
8317 || GET_CODE (XEXP (x, 0)) != UNSPEC
8318 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8321 return XVECEXP (XEXP (x, 0), 0, 0);
8324 if (GET_CODE (x) != PLUS
8325 || GET_CODE (XEXP (x, 1)) != CONST)
8328 if (REG_P (XEXP (x, 0))
8329 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8330 /* %ebx + GOT/GOTOFF */
8332 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8334 /* %ebx + %reg * scale + GOT/GOTOFF */
8335 reg_addend = XEXP (x, 0);
8336 if (REG_P (XEXP (reg_addend, 0))
8337 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8338 reg_addend = XEXP (reg_addend, 1);
8339 else if (REG_P (XEXP (reg_addend, 1))
8340 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8341 reg_addend = XEXP (reg_addend, 0);
8344 if (!REG_P (reg_addend)
8345 && GET_CODE (reg_addend) != MULT
8346 && GET_CODE (reg_addend) != ASHIFT)
8352 x = XEXP (XEXP (x, 1), 0);
8353 if (GET_CODE (x) == PLUS
8354 && CONST_INT_P (XEXP (x, 1)))
8356 const_addend = XEXP (x, 1);
8360 if (GET_CODE (x) == UNSPEC
8361 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8362 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8363 result = XVECEXP (x, 0, 0);
8365 if (TARGET_MACHO && darwin_local_data_pic (x)
8367 result = XEXP (x, 0);
8373 result = gen_rtx_PLUS (Pmode, result, const_addend);
8375 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8379 /* If X is a machine specific address (i.e. a symbol or label being
8380 referenced as a displacement from the GOT implemented using an
8381 UNSPEC), then return the base term. Otherwise return X. */
8384 ix86_find_base_term (rtx x)
8390 if (GET_CODE (x) != CONST)
8393 if (GET_CODE (term) == PLUS
8394 && (CONST_INT_P (XEXP (term, 1))
8395 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8396 term = XEXP (term, 0);
8397 if (GET_CODE (term) != UNSPEC
8398 || XINT (term, 1) != UNSPEC_GOTPCREL)
8401 term = XVECEXP (term, 0, 0);
8403 if (GET_CODE (term) != SYMBOL_REF
8404 && GET_CODE (term) != LABEL_REF)
8410 term = ix86_delegitimize_address (x);
8412 if (GET_CODE (term) != SYMBOL_REF
8413 && GET_CODE (term) != LABEL_REF)
8420 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8425 if (mode == CCFPmode || mode == CCFPUmode)
8427 enum rtx_code second_code, bypass_code;
8428 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8429 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8430 code = ix86_fp_compare_code_to_integer (code);
8434 code = reverse_condition (code);
8485 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8489 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8490 Those same assemblers have the same but opposite lossage on cmov. */
8492 suffix = fp ? "nbe" : "a";
8493 else if (mode == CCCmode)
8516 gcc_assert (mode == CCmode || mode == CCCmode);
8538 gcc_assert (mode == CCmode || mode == CCCmode);
8539 suffix = fp ? "nb" : "ae";
8542 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8549 else if (mode == CCCmode)
8550 suffix = fp ? "nb" : "ae";
8555 suffix = fp ? "u" : "p";
8558 suffix = fp ? "nu" : "np";
8563 fputs (suffix, file);
8566 /* Print the name of register X to FILE based on its machine mode and number.
8567 If CODE is 'w', pretend the mode is HImode.
8568 If CODE is 'b', pretend the mode is QImode.
8569 If CODE is 'k', pretend the mode is SImode.
8570 If CODE is 'q', pretend the mode is DImode.
8571 If CODE is 'h', pretend the reg is the 'high' byte register.
8572 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8575 print_reg (rtx x, int code, FILE *file)
8577 gcc_assert (x == pc_rtx
8578 || (REGNO (x) != ARG_POINTER_REGNUM
8579 && REGNO (x) != FRAME_POINTER_REGNUM
8580 && REGNO (x) != FLAGS_REG
8581 && REGNO (x) != FPSR_REG
8582 && REGNO (x) != FPCR_REG));
8584 if (ASSEMBLER_DIALECT == ASM_ATT)
8589 gcc_assert (TARGET_64BIT);
8590 fputs ("rip", file);
8594 if (code == 'w' || MMX_REG_P (x))
8596 else if (code == 'b')
8598 else if (code == 'k')
8600 else if (code == 'q')
8602 else if (code == 'y')
8604 else if (code == 'h')
8607 code = GET_MODE_SIZE (GET_MODE (x));
8609 /* Irritatingly, AMD extended registers use different naming convention
8610 from the normal registers. */
8611 if (REX_INT_REG_P (x))
8613 gcc_assert (TARGET_64BIT);
8617 error ("extended registers have no high halves");
8620 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8623 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8626 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8629 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8632 error ("unsupported operand size for extended register");
8640 if (STACK_TOP_P (x))
8642 fputs ("st(0)", file);
8649 if (! ANY_FP_REG_P (x))
8650 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8655 fputs (hi_reg_name[REGNO (x)], file);
8658 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8660 fputs (qi_reg_name[REGNO (x)], file);
8663 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8665 fputs (qi_high_reg_name[REGNO (x)], file);
8672 /* Locate some local-dynamic symbol still in use by this function
8673 so that we can print its name in some tls_local_dynamic_base
8677 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8681 if (GET_CODE (x) == SYMBOL_REF
8682 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8684 cfun->machine->some_ld_name = XSTR (x, 0);
8692 get_some_local_dynamic_name (void)
8696 if (cfun->machine->some_ld_name)
8697 return cfun->machine->some_ld_name;
8699 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8701 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8702 return cfun->machine->some_ld_name;
8708 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8709 C -- print opcode suffix for set/cmov insn.
8710 c -- like C, but print reversed condition
8711 F,f -- likewise, but for floating-point.
8712 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8714 R -- print the prefix for register names.
8715 z -- print the opcode suffix for the size of the current operand.
8716 * -- print a star (in certain assembler syntax)
8717 A -- print an absolute memory reference.
8718 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8719 s -- print a shift double count, followed by the assemblers argument
8721 b -- print the QImode name of the register for the indicated operand.
8722 %b0 would print %al if operands[0] is reg 0.
8723 w -- likewise, print the HImode name of the register.
8724 k -- likewise, print the SImode name of the register.
8725 q -- likewise, print the DImode name of the register.
8726 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8727 y -- print "st(0)" instead of "st" as a register.
8728 D -- print condition for SSE cmp instruction.
8729 P -- if PIC, print an @PLT suffix.
8730 X -- don't print any sort of PIC '@' suffix for a symbol.
8731 & -- print some in-use local-dynamic symbol name.
8732 H -- print a memory address offset by 8; used for sse high-parts
8733 Y -- print condition for SSE5 com* instruction.
8734 + -- print a branch hint as 'cs' or 'ds' prefix
8735 ; -- print a semicolon (after prefixes due to bug in older gas).
8739 print_operand (FILE *file, rtx x, int code)
8746 if (ASSEMBLER_DIALECT == ASM_ATT)
8751 assemble_name (file, get_some_local_dynamic_name ());
8755 switch (ASSEMBLER_DIALECT)
8762 /* Intel syntax. For absolute addresses, registers should not
8763 be surrounded by braces. */
8767 PRINT_OPERAND (file, x, 0);
8777 PRINT_OPERAND (file, x, 0);
8782 if (ASSEMBLER_DIALECT == ASM_ATT)
8787 if (ASSEMBLER_DIALECT == ASM_ATT)
8792 if (ASSEMBLER_DIALECT == ASM_ATT)
8797 if (ASSEMBLER_DIALECT == ASM_ATT)
8802 if (ASSEMBLER_DIALECT == ASM_ATT)
8807 if (ASSEMBLER_DIALECT == ASM_ATT)
8812 /* 387 opcodes don't get size suffixes if the operands are
8814 if (STACK_REG_P (x))
8817 /* Likewise if using Intel opcodes. */
8818 if (ASSEMBLER_DIALECT == ASM_INTEL)
8821 /* This is the size of op from size of operand. */
8822 switch (GET_MODE_SIZE (GET_MODE (x)))
8831 #ifdef HAVE_GAS_FILDS_FISTS
8841 if (GET_MODE (x) == SFmode)
8856 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8858 #ifdef GAS_MNEMONICS
8884 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8886 PRINT_OPERAND (file, x, 0);
8892 /* Little bit of braindamage here. The SSE compare instructions
8893 does use completely different names for the comparisons that the
8894 fp conditional moves. */
8895 switch (GET_CODE (x))
8910 fputs ("unord", file);
8914 fputs ("neq", file);
8918 fputs ("nlt", file);
8922 fputs ("nle", file);
8925 fputs ("ord", file);
8932 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8933 if (ASSEMBLER_DIALECT == ASM_ATT)
8935 switch (GET_MODE (x))
8937 case HImode: putc ('w', file); break;
8939 case SFmode: putc ('l', file); break;
8941 case DFmode: putc ('q', file); break;
8942 default: gcc_unreachable ();
8949 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8952 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8953 if (ASSEMBLER_DIALECT == ASM_ATT)
8956 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8959 /* Like above, but reverse condition */
8961 /* Check to see if argument to %c is really a constant
8962 and not a condition code which needs to be reversed. */
8963 if (!COMPARISON_P (x))
8965 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8968 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8971 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8972 if (ASSEMBLER_DIALECT == ASM_ATT)
8975 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8979 /* It doesn't actually matter what mode we use here, as we're
8980 only going to use this for printing. */
8981 x = adjust_address_nv (x, DImode, 8);
8988 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8991 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8994 int pred_val = INTVAL (XEXP (x, 0));
8996 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8997 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8999 int taken = pred_val > REG_BR_PROB_BASE / 2;
9000 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9002 /* Emit hints only in the case default branch prediction
9003 heuristics would fail. */
9004 if (taken != cputaken)
9006 /* We use 3e (DS) prefix for taken branches and
9007 2e (CS) prefix for not taken branches. */
9009 fputs ("ds ; ", file);
9011 fputs ("cs ; ", file);
9019 switch (GET_CODE (x))
9022 fputs ("neq", file);
9029 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9033 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9044 fputs ("unord", file);
9047 fputs ("ord", file);
9050 fputs ("ueq", file);
9053 fputs ("nlt", file);
9056 fputs ("nle", file);
9059 fputs ("ule", file);
9062 fputs ("ult", file);
9065 fputs ("une", file);
9074 fputs (" ; ", file);
9081 output_operand_lossage ("invalid operand code '%c'", code);
9086 print_reg (x, code, file);
9090 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9091 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9092 && GET_MODE (x) != BLKmode)
9095 switch (GET_MODE_SIZE (GET_MODE (x)))
9097 case 1: size = "BYTE"; break;
9098 case 2: size = "WORD"; break;
9099 case 4: size = "DWORD"; break;
9100 case 8: size = "QWORD"; break;
9101 case 12: size = "XWORD"; break;
9103 if (GET_MODE (x) == XFmode)
9112 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9115 else if (code == 'w')
9117 else if (code == 'k')
9121 fputs (" PTR ", file);
9125 /* Avoid (%rip) for call operands. */
9126 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9127 && !CONST_INT_P (x))
9128 output_addr_const (file, x);
9129 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9130 output_operand_lossage ("invalid constraints for operand");
9135 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9140 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9141 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9143 if (ASSEMBLER_DIALECT == ASM_ATT)
9145 fprintf (file, "0x%08lx", l);
9148 /* These float cases don't actually occur as immediate operands. */
9149 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9153 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9154 fprintf (file, "%s", dstr);
9157 else if (GET_CODE (x) == CONST_DOUBLE
9158 && GET_MODE (x) == XFmode)
9162 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9163 fprintf (file, "%s", dstr);
9168 /* We have patterns that allow zero sets of memory, for instance.
9169 In 64-bit mode, we should probably support all 8-byte vectors,
9170 since we can in fact encode that into an immediate. */
9171 if (GET_CODE (x) == CONST_VECTOR)
9173 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9179 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9181 if (ASSEMBLER_DIALECT == ASM_ATT)
9184 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9185 || GET_CODE (x) == LABEL_REF)
9187 if (ASSEMBLER_DIALECT == ASM_ATT)
9190 fputs ("OFFSET FLAT:", file);
9193 if (CONST_INT_P (x))
9194 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9196 output_pic_addr_const (file, x, code);
9198 output_addr_const (file, x);
9202 /* Print a memory operand whose address is ADDR. */
9205 print_operand_address (FILE *file, rtx addr)
9207 struct ix86_address parts;
9208 rtx base, index, disp;
9210 int ok = ix86_decompose_address (addr, &parts);
9215 index = parts.index;
9217 scale = parts.scale;
9225 if (ASSEMBLER_DIALECT == ASM_ATT)
9227 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9233 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9234 if (TARGET_64BIT && !base && !index)
9238 if (GET_CODE (disp) == CONST
9239 && GET_CODE (XEXP (disp, 0)) == PLUS
9240 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9241 symbol = XEXP (XEXP (disp, 0), 0);
9243 if (GET_CODE (symbol) == LABEL_REF
9244 || (GET_CODE (symbol) == SYMBOL_REF
9245 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9248 if (!base && !index)
9250 /* Displacement only requires special attention. */
9252 if (CONST_INT_P (disp))
9254 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9255 fputs ("ds:", file);
9256 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9259 output_pic_addr_const (file, disp, 0);
9261 output_addr_const (file, disp);
9265 if (ASSEMBLER_DIALECT == ASM_ATT)
9270 output_pic_addr_const (file, disp, 0);
9271 else if (GET_CODE (disp) == LABEL_REF)
9272 output_asm_label (disp);
9274 output_addr_const (file, disp);
9279 print_reg (base, 0, file);
9283 print_reg (index, 0, file);
9285 fprintf (file, ",%d", scale);
9291 rtx offset = NULL_RTX;
9295 /* Pull out the offset of a symbol; print any symbol itself. */
9296 if (GET_CODE (disp) == CONST
9297 && GET_CODE (XEXP (disp, 0)) == PLUS
9298 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9300 offset = XEXP (XEXP (disp, 0), 1);
9301 disp = gen_rtx_CONST (VOIDmode,
9302 XEXP (XEXP (disp, 0), 0));
9306 output_pic_addr_const (file, disp, 0);
9307 else if (GET_CODE (disp) == LABEL_REF)
9308 output_asm_label (disp);
9309 else if (CONST_INT_P (disp))
9312 output_addr_const (file, disp);
9318 print_reg (base, 0, file);
9321 if (INTVAL (offset) >= 0)
9323 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9327 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9334 print_reg (index, 0, file);
9336 fprintf (file, "*%d", scale);
9344 output_addr_const_extra (FILE *file, rtx x)
9348 if (GET_CODE (x) != UNSPEC)
9351 op = XVECEXP (x, 0, 0);
9352 switch (XINT (x, 1))
9354 case UNSPEC_GOTTPOFF:
9355 output_addr_const (file, op);
9356 /* FIXME: This might be @TPOFF in Sun ld. */
9357 fputs ("@GOTTPOFF", file);
9360 output_addr_const (file, op);
9361 fputs ("@TPOFF", file);
9364 output_addr_const (file, op);
9366 fputs ("@TPOFF", file);
9368 fputs ("@NTPOFF", file);
9371 output_addr_const (file, op);
9372 fputs ("@DTPOFF", file);
9374 case UNSPEC_GOTNTPOFF:
9375 output_addr_const (file, op);
9377 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9378 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9380 fputs ("@GOTNTPOFF", file);
9382 case UNSPEC_INDNTPOFF:
9383 output_addr_const (file, op);
9384 fputs ("@INDNTPOFF", file);
9394 /* Split one or more DImode RTL references into pairs of SImode
9395 references. The RTL can be REG, offsettable MEM, integer constant, or
9396 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9397 split and "num" is its length. lo_half and hi_half are output arrays
9398 that parallel "operands". */
9401 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9405 rtx op = operands[num];
9407 /* simplify_subreg refuse to split volatile memory addresses,
9408 but we still have to handle it. */
9411 lo_half[num] = adjust_address (op, SImode, 0);
9412 hi_half[num] = adjust_address (op, SImode, 4);
9416 lo_half[num] = simplify_gen_subreg (SImode, op,
9417 GET_MODE (op) == VOIDmode
9418 ? DImode : GET_MODE (op), 0);
9419 hi_half[num] = simplify_gen_subreg (SImode, op,
9420 GET_MODE (op) == VOIDmode
9421 ? DImode : GET_MODE (op), 4);
9425 /* Split one or more TImode RTL references into pairs of DImode
9426 references. The RTL can be REG, offsettable MEM, integer constant, or
9427 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9428 split and "num" is its length. lo_half and hi_half are output arrays
9429 that parallel "operands". */
9432 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9436 rtx op = operands[num];
9438 /* simplify_subreg refuse to split volatile memory addresses, but we
9439 still have to handle it. */
9442 lo_half[num] = adjust_address (op, DImode, 0);
9443 hi_half[num] = adjust_address (op, DImode, 8);
9447 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9448 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9453 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9454 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9455 is the expression of the binary operation. The output may either be
9456 emitted here, or returned to the caller, like all output_* functions.
9458 There is no guarantee that the operands are the same mode, as they
9459 might be within FLOAT or FLOAT_EXTEND expressions. */
9461 #ifndef SYSV386_COMPAT
9462 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9463 wants to fix the assemblers because that causes incompatibility
9464 with gcc. No-one wants to fix gcc because that causes
9465 incompatibility with assemblers... You can use the option of
9466 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9467 #define SYSV386_COMPAT 1
9471 output_387_binary_op (rtx insn, rtx *operands)
9473 static char buf[30];
9476 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9478 #ifdef ENABLE_CHECKING
9479 /* Even if we do not want to check the inputs, this documents input
9480 constraints. Which helps in understanding the following code. */
9481 if (STACK_REG_P (operands[0])
9482 && ((REG_P (operands[1])
9483 && REGNO (operands[0]) == REGNO (operands[1])
9484 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9485 || (REG_P (operands[2])
9486 && REGNO (operands[0]) == REGNO (operands[2])
9487 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9488 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9491 gcc_assert (is_sse);
9494 switch (GET_CODE (operands[3]))
9497 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9498 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9506 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9507 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9515 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9516 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9524 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9525 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9539 if (GET_MODE (operands[0]) == SFmode)
9540 strcat (buf, "ss\t{%2, %0|%0, %2}");
9542 strcat (buf, "sd\t{%2, %0|%0, %2}");
9547 switch (GET_CODE (operands[3]))
9551 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9553 rtx temp = operands[2];
9554 operands[2] = operands[1];
9558 /* know operands[0] == operands[1]. */
9560 if (MEM_P (operands[2]))
9566 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9568 if (STACK_TOP_P (operands[0]))
9569 /* How is it that we are storing to a dead operand[2]?
9570 Well, presumably operands[1] is dead too. We can't
9571 store the result to st(0) as st(0) gets popped on this
9572 instruction. Instead store to operands[2] (which I
9573 think has to be st(1)). st(1) will be popped later.
9574 gcc <= 2.8.1 didn't have this check and generated
9575 assembly code that the Unixware assembler rejected. */
9576 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9578 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9582 if (STACK_TOP_P (operands[0]))
9583 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9585 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9590 if (MEM_P (operands[1]))
9596 if (MEM_P (operands[2]))
9602 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9605 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9606 derived assemblers, confusingly reverse the direction of
9607 the operation for fsub{r} and fdiv{r} when the
9608 destination register is not st(0). The Intel assembler
9609 doesn't have this brain damage. Read !SYSV386_COMPAT to
9610 figure out what the hardware really does. */
9611 if (STACK_TOP_P (operands[0]))
9612 p = "{p\t%0, %2|rp\t%2, %0}";
9614 p = "{rp\t%2, %0|p\t%0, %2}";
9616 if (STACK_TOP_P (operands[0]))
9617 /* As above for fmul/fadd, we can't store to st(0). */
9618 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9620 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9625 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9628 if (STACK_TOP_P (operands[0]))
9629 p = "{rp\t%0, %1|p\t%1, %0}";
9631 p = "{p\t%1, %0|rp\t%0, %1}";
9633 if (STACK_TOP_P (operands[0]))
9634 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9636 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9641 if (STACK_TOP_P (operands[0]))
9643 if (STACK_TOP_P (operands[1]))
9644 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9646 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9649 else if (STACK_TOP_P (operands[1]))
9652 p = "{\t%1, %0|r\t%0, %1}";
9654 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9660 p = "{r\t%2, %0|\t%0, %2}";
9662 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9675 /* Return needed mode for entity in optimize_mode_switching pass. */
9678 ix86_mode_needed (int entity, rtx insn)
9680 enum attr_i387_cw mode;
9682 /* The mode UNINITIALIZED is used to store control word after a
9683 function call or ASM pattern. The mode ANY specify that function
9684 has no requirements on the control word and make no changes in the
9685 bits we are interested in. */
9688 || (NONJUMP_INSN_P (insn)
9689 && (asm_noperands (PATTERN (insn)) >= 0
9690 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9691 return I387_CW_UNINITIALIZED;
9693 if (recog_memoized (insn) < 0)
9696 mode = get_attr_i387_cw (insn);
9701 if (mode == I387_CW_TRUNC)
9706 if (mode == I387_CW_FLOOR)
9711 if (mode == I387_CW_CEIL)
9716 if (mode == I387_CW_MASK_PM)
9727 /* Output code to initialize control word copies used by trunc?f?i and
9728 rounding patterns. CURRENT_MODE is set to current control word,
9729 while NEW_MODE is set to new control word. */
9732 emit_i387_cw_initialization (int mode)
9734 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9737 enum ix86_stack_slot slot;
9739 rtx reg = gen_reg_rtx (HImode);
9741 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9742 emit_move_insn (reg, copy_rtx (stored_mode));
9744 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9749 /* round toward zero (truncate) */
9750 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9751 slot = SLOT_CW_TRUNC;
9755 /* round down toward -oo */
9756 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9757 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9758 slot = SLOT_CW_FLOOR;
9762 /* round up toward +oo */
9763 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9764 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9765 slot = SLOT_CW_CEIL;
9768 case I387_CW_MASK_PM:
9769 /* mask precision exception for nearbyint() */
9770 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9771 slot = SLOT_CW_MASK_PM;
9783 /* round toward zero (truncate) */
9784 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9785 slot = SLOT_CW_TRUNC;
9789 /* round down toward -oo */
9790 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9791 slot = SLOT_CW_FLOOR;
9795 /* round up toward +oo */
9796 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9797 slot = SLOT_CW_CEIL;
9800 case I387_CW_MASK_PM:
9801 /* mask precision exception for nearbyint() */
9802 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9803 slot = SLOT_CW_MASK_PM;
9811 gcc_assert (slot < MAX_386_STACK_LOCALS);
9813 new_mode = assign_386_stack_local (HImode, slot);
9814 emit_move_insn (new_mode, reg);
9817 /* Output code for INSN to convert a float to a signed int. OPERANDS
9818 are the insn operands. The output may be [HSD]Imode and the input
9819 operand may be [SDX]Fmode. */
9822 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9824 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9825 int dimode_p = GET_MODE (operands[0]) == DImode;
9826 int round_mode = get_attr_i387_cw (insn);
9828 /* Jump through a hoop or two for DImode, since the hardware has no
9829 non-popping instruction. We used to do this a different way, but
9830 that was somewhat fragile and broke with post-reload splitters. */
9831 if ((dimode_p || fisttp) && !stack_top_dies)
9832 output_asm_insn ("fld\t%y1", operands);
9834 gcc_assert (STACK_TOP_P (operands[1]));
9835 gcc_assert (MEM_P (operands[0]));
9836 gcc_assert (GET_MODE (operands[1]) != TFmode);
9839 output_asm_insn ("fisttp%z0\t%0", operands);
9842 if (round_mode != I387_CW_ANY)
9843 output_asm_insn ("fldcw\t%3", operands);
9844 if (stack_top_dies || dimode_p)
9845 output_asm_insn ("fistp%z0\t%0", operands);
9847 output_asm_insn ("fist%z0\t%0", operands);
9848 if (round_mode != I387_CW_ANY)
9849 output_asm_insn ("fldcw\t%2", operands);
9855 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9856 have the values zero or one, indicates the ffreep insn's operand
9857 from the OPERANDS array. */
9860 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9862 if (TARGET_USE_FFREEP)
9863 #if HAVE_AS_IX86_FFREEP
9864 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9867 static char retval[] = ".word\t0xc_df";
9868 int regno = REGNO (operands[opno]);
9870 gcc_assert (FP_REGNO_P (regno));
9872 retval[9] = '0' + (regno - FIRST_STACK_REG);
9877 return opno ? "fstp\t%y1" : "fstp\t%y0";
9881 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9882 should be used. UNORDERED_P is true when fucom should be used. */
9885 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9888 rtx cmp_op0, cmp_op1;
9889 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9893 cmp_op0 = operands[0];
9894 cmp_op1 = operands[1];
9898 cmp_op0 = operands[1];
9899 cmp_op1 = operands[2];
9904 if (GET_MODE (operands[0]) == SFmode)
9906 return "ucomiss\t{%1, %0|%0, %1}";
9908 return "comiss\t{%1, %0|%0, %1}";
9911 return "ucomisd\t{%1, %0|%0, %1}";
9913 return "comisd\t{%1, %0|%0, %1}";
9916 gcc_assert (STACK_TOP_P (cmp_op0));
9918 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9920 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9924 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9925 return output_387_ffreep (operands, 1);
9928 return "ftst\n\tfnstsw\t%0";
9931 if (STACK_REG_P (cmp_op1)
9933 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9934 && REGNO (cmp_op1) != FIRST_STACK_REG)
9936 /* If both the top of the 387 stack dies, and the other operand
9937 is also a stack register that dies, then this must be a
9938 `fcompp' float compare */
9942 /* There is no double popping fcomi variant. Fortunately,
9943 eflags is immune from the fstp's cc clobbering. */
9945 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9947 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9948 return output_387_ffreep (operands, 0);
9953 return "fucompp\n\tfnstsw\t%0";
9955 return "fcompp\n\tfnstsw\t%0";
9960 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9962 static const char * const alt[16] =
9964 "fcom%z2\t%y2\n\tfnstsw\t%0",
9965 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9966 "fucom%z2\t%y2\n\tfnstsw\t%0",
9967 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9969 "ficom%z2\t%y2\n\tfnstsw\t%0",
9970 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9974 "fcomi\t{%y1, %0|%0, %y1}",
9975 "fcomip\t{%y1, %0|%0, %y1}",
9976 "fucomi\t{%y1, %0|%0, %y1}",
9977 "fucomip\t{%y1, %0|%0, %y1}",
9988 mask = eflags_p << 3;
9989 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9990 mask |= unordered_p << 1;
9991 mask |= stack_top_dies;
9993 gcc_assert (mask < 16);
10002 ix86_output_addr_vec_elt (FILE *file, int value)
10004 const char *directive = ASM_LONG;
10008 directive = ASM_QUAD;
10010 gcc_assert (!TARGET_64BIT);
10013 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10017 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10019 const char *directive = ASM_LONG;
10022 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10023 directive = ASM_QUAD;
10025 gcc_assert (!TARGET_64BIT);
10027 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10028 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10029 fprintf (file, "%s%s%d-%s%d\n",
10030 directive, LPREFIX, value, LPREFIX, rel);
10031 else if (HAVE_AS_GOTOFF_IN_DATA)
10032 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10034 else if (TARGET_MACHO)
10036 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10037 machopic_output_function_base_name (file);
10038 fprintf(file, "\n");
10042 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10043 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10046 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10050 ix86_expand_clear (rtx dest)
10054 /* We play register width games, which are only valid after reload. */
10055 gcc_assert (reload_completed);
10057 /* Avoid HImode and its attendant prefix byte. */
10058 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10059 dest = gen_rtx_REG (SImode, REGNO (dest));
10060 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10062 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10063 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10065 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10066 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10072 /* X is an unchanging MEM. If it is a constant pool reference, return
10073 the constant pool rtx, else NULL. */
10076 maybe_get_pool_constant (rtx x)
10078 x = ix86_delegitimize_address (XEXP (x, 0));
10080 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10081 return get_pool_constant (x);
10087 ix86_expand_move (enum machine_mode mode, rtx operands[])
10090 enum tls_model model;
10095 if (GET_CODE (op1) == SYMBOL_REF)
10097 model = SYMBOL_REF_TLS_MODEL (op1);
10100 op1 = legitimize_tls_address (op1, model, true);
10101 op1 = force_operand (op1, op0);
10105 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10106 && SYMBOL_REF_DLLIMPORT_P (op1))
10107 op1 = legitimize_dllimport_symbol (op1, false);
10109 else if (GET_CODE (op1) == CONST
10110 && GET_CODE (XEXP (op1, 0)) == PLUS
10111 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10113 rtx addend = XEXP (XEXP (op1, 0), 1);
10114 rtx symbol = XEXP (XEXP (op1, 0), 0);
10117 model = SYMBOL_REF_TLS_MODEL (symbol);
10119 tmp = legitimize_tls_address (symbol, model, true);
10120 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10121 && SYMBOL_REF_DLLIMPORT_P (symbol))
10122 tmp = legitimize_dllimport_symbol (symbol, true);
10126 tmp = force_operand (tmp, NULL);
10127 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10128 op0, 1, OPTAB_DIRECT);
10134 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10136 if (TARGET_MACHO && !TARGET_64BIT)
10141 rtx temp = ((reload_in_progress
10142 || ((op0 && REG_P (op0))
10144 ? op0 : gen_reg_rtx (Pmode));
10145 op1 = machopic_indirect_data_reference (op1, temp);
10146 op1 = machopic_legitimize_pic_address (op1, mode,
10147 temp == op1 ? 0 : temp);
10149 else if (MACHOPIC_INDIRECT)
10150 op1 = machopic_indirect_data_reference (op1, 0);
10158 op1 = force_reg (Pmode, op1);
10159 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10161 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10162 op1 = legitimize_pic_address (op1, reg);
10171 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10172 || !push_operand (op0, mode))
10174 op1 = force_reg (mode, op1);
10176 if (push_operand (op0, mode)
10177 && ! general_no_elim_operand (op1, mode))
10178 op1 = copy_to_mode_reg (mode, op1);
10180 /* Force large constants in 64bit compilation into register
10181 to get them CSEed. */
10182 if (can_create_pseudo_p ()
10183 && (mode == DImode) && TARGET_64BIT
10184 && immediate_operand (op1, mode)
10185 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10186 && !register_operand (op0, mode)
10188 op1 = copy_to_mode_reg (mode, op1);
10190 if (can_create_pseudo_p ()
10191 && FLOAT_MODE_P (mode)
10192 && GET_CODE (op1) == CONST_DOUBLE)
10194 /* If we are loading a floating point constant to a register,
10195 force the value to memory now, since we'll get better code
10196 out the back end. */
10198 op1 = validize_mem (force_const_mem (mode, op1));
10199 if (!register_operand (op0, mode))
10201 rtx temp = gen_reg_rtx (mode);
10202 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10203 emit_move_insn (op0, temp);
10209 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10213 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10215 rtx op0 = operands[0], op1 = operands[1];
10216 unsigned int align = GET_MODE_ALIGNMENT (mode);
10218 /* Force constants other than zero into memory. We do not know how
10219 the instructions used to build constants modify the upper 64 bits
10220 of the register, once we have that information we may be able
10221 to handle some of them more efficiently. */
10222 if (can_create_pseudo_p ()
10223 && register_operand (op0, mode)
10224 && (CONSTANT_P (op1)
10225 || (GET_CODE (op1) == SUBREG
10226 && CONSTANT_P (SUBREG_REG (op1))))
10227 && standard_sse_constant_p (op1) <= 0)
10228 op1 = validize_mem (force_const_mem (mode, op1));
10230 /* TDmode values are passed as TImode on the stack. TImode values
10231 are moved via xmm registers, and moving them to stack can result in
10232 unaligned memory access. Use ix86_expand_vector_move_misalign()
10233 if memory operand is not aligned correctly. */
10234 if (can_create_pseudo_p ()
10235 && (mode == TImode) && !TARGET_64BIT
10236 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10237 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10241 /* ix86_expand_vector_move_misalign() does not like constants ... */
10242 if (CONSTANT_P (op1)
10243 || (GET_CODE (op1) == SUBREG
10244 && CONSTANT_P (SUBREG_REG (op1))))
10245 op1 = validize_mem (force_const_mem (mode, op1));
10247 /* ... nor both arguments in memory. */
10248 if (!register_operand (op0, mode)
10249 && !register_operand (op1, mode))
10250 op1 = force_reg (mode, op1);
10252 tmp[0] = op0; tmp[1] = op1;
10253 ix86_expand_vector_move_misalign (mode, tmp);
10257 /* Make operand1 a register if it isn't already. */
10258 if (can_create_pseudo_p ()
10259 && !register_operand (op0, mode)
10260 && !register_operand (op1, mode))
10262 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10266 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10269 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10270 straight to ix86_expand_vector_move. */
10271 /* Code generation for scalar reg-reg moves of single and double precision data:
10272 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10276 if (x86_sse_partial_reg_dependency == true)
10281 Code generation for scalar loads of double precision data:
10282 if (x86_sse_split_regs == true)
10283 movlpd mem, reg (gas syntax)
10287 Code generation for unaligned packed loads of single precision data
10288 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10289 if (x86_sse_unaligned_move_optimal)
10292 if (x86_sse_partial_reg_dependency == true)
10304 Code generation for unaligned packed loads of double precision data
10305 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10306 if (x86_sse_unaligned_move_optimal)
10309 if (x86_sse_split_regs == true)
10322 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10331 /* If we're optimizing for size, movups is the smallest. */
10334 op0 = gen_lowpart (V4SFmode, op0);
10335 op1 = gen_lowpart (V4SFmode, op1);
10336 emit_insn (gen_sse_movups (op0, op1));
10340 /* ??? If we have typed data, then it would appear that using
10341 movdqu is the only way to get unaligned data loaded with
10343 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10345 op0 = gen_lowpart (V16QImode, op0);
10346 op1 = gen_lowpart (V16QImode, op1);
10347 emit_insn (gen_sse2_movdqu (op0, op1));
10351 if (TARGET_SSE2 && mode == V2DFmode)
10355 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10357 op0 = gen_lowpart (V2DFmode, op0);
10358 op1 = gen_lowpart (V2DFmode, op1);
10359 emit_insn (gen_sse2_movupd (op0, op1));
10363 /* When SSE registers are split into halves, we can avoid
10364 writing to the top half twice. */
10365 if (TARGET_SSE_SPLIT_REGS)
10367 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10372 /* ??? Not sure about the best option for the Intel chips.
10373 The following would seem to satisfy; the register is
10374 entirely cleared, breaking the dependency chain. We
10375 then store to the upper half, with a dependency depth
10376 of one. A rumor has it that Intel recommends two movsd
10377 followed by an unpacklpd, but this is unconfirmed. And
10378 given that the dependency depth of the unpacklpd would
10379 still be one, I'm not sure why this would be better. */
10380 zero = CONST0_RTX (V2DFmode);
10383 m = adjust_address (op1, DFmode, 0);
10384 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10385 m = adjust_address (op1, DFmode, 8);
10386 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10390 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10392 op0 = gen_lowpart (V4SFmode, op0);
10393 op1 = gen_lowpart (V4SFmode, op1);
10394 emit_insn (gen_sse_movups (op0, op1));
10398 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10399 emit_move_insn (op0, CONST0_RTX (mode));
10401 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10403 if (mode != V4SFmode)
10404 op0 = gen_lowpart (V4SFmode, op0);
10405 m = adjust_address (op1, V2SFmode, 0);
10406 emit_insn (gen_sse_loadlps (op0, op0, m));
10407 m = adjust_address (op1, V2SFmode, 8);
10408 emit_insn (gen_sse_loadhps (op0, op0, m));
10411 else if (MEM_P (op0))
10413 /* If we're optimizing for size, movups is the smallest. */
10416 op0 = gen_lowpart (V4SFmode, op0);
10417 op1 = gen_lowpart (V4SFmode, op1);
10418 emit_insn (gen_sse_movups (op0, op1));
10422 /* ??? Similar to above, only less clear because of quote
10423 typeless stores unquote. */
10424 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10425 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10427 op0 = gen_lowpart (V16QImode, op0);
10428 op1 = gen_lowpart (V16QImode, op1);
10429 emit_insn (gen_sse2_movdqu (op0, op1));
10433 if (TARGET_SSE2 && mode == V2DFmode)
10435 m = adjust_address (op0, DFmode, 0);
10436 emit_insn (gen_sse2_storelpd (m, op1));
10437 m = adjust_address (op0, DFmode, 8);
10438 emit_insn (gen_sse2_storehpd (m, op1));
10442 if (mode != V4SFmode)
10443 op1 = gen_lowpart (V4SFmode, op1);
10444 m = adjust_address (op0, V2SFmode, 0);
10445 emit_insn (gen_sse_storelps (m, op1));
10446 m = adjust_address (op0, V2SFmode, 8);
10447 emit_insn (gen_sse_storehps (m, op1));
10451 gcc_unreachable ();
10454 /* Expand a push in MODE. This is some mode for which we do not support
10455 proper push instructions, at least from the registers that we expect
10456 the value to live in. */
10459 ix86_expand_push (enum machine_mode mode, rtx x)
10463 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10464 GEN_INT (-GET_MODE_SIZE (mode)),
10465 stack_pointer_rtx, 1, OPTAB_DIRECT);
10466 if (tmp != stack_pointer_rtx)
10467 emit_move_insn (stack_pointer_rtx, tmp);
10469 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10470 emit_move_insn (tmp, x);
10473 /* Helper function of ix86_fixup_binary_operands to canonicalize
10474 operand order. Returns true if the operands should be swapped. */
10477 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10480 rtx dst = operands[0];
10481 rtx src1 = operands[1];
10482 rtx src2 = operands[2];
10484 /* If the operation is not commutative, we can't do anything. */
10485 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10488 /* Highest priority is that src1 should match dst. */
10489 if (rtx_equal_p (dst, src1))
10491 if (rtx_equal_p (dst, src2))
10494 /* Next highest priority is that immediate constants come second. */
10495 if (immediate_operand (src2, mode))
10497 if (immediate_operand (src1, mode))
10500 /* Lowest priority is that memory references should come second. */
10510 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10511 destination to use for the operation. If different from the true
10512 destination in operands[0], a copy operation will be required. */
10515 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10518 rtx dst = operands[0];
10519 rtx src1 = operands[1];
10520 rtx src2 = operands[2];
10522 /* Canonicalize operand order. */
10523 if (ix86_swap_binary_operands_p (code, mode, operands))
10530 /* Both source operands cannot be in memory. */
10531 if (MEM_P (src1) && MEM_P (src2))
10533 /* Optimization: Only read from memory once. */
10534 if (rtx_equal_p (src1, src2))
10536 src2 = force_reg (mode, src2);
10540 src2 = force_reg (mode, src2);
10543 /* If the destination is memory, and we do not have matching source
10544 operands, do things in registers. */
10545 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10546 dst = gen_reg_rtx (mode);
10548 /* Source 1 cannot be a constant. */
10549 if (CONSTANT_P (src1))
10550 src1 = force_reg (mode, src1);
10552 /* Source 1 cannot be a non-matching memory. */
10553 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10554 src1 = force_reg (mode, src1);
10556 operands[1] = src1;
10557 operands[2] = src2;
10561 /* Similarly, but assume that the destination has already been
10562 set up properly. */
10565 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10566 enum machine_mode mode, rtx operands[])
10568 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10569 gcc_assert (dst == operands[0]);
10572 /* Attempt to expand a binary operator. Make the expansion closer to the
10573 actual machine, then just general_operand, which will allow 3 separate
10574 memory references (one output, two input) in a single insn. */
10577 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10580 rtx src1, src2, dst, op, clob;
10582 dst = ix86_fixup_binary_operands (code, mode, operands);
10583 src1 = operands[1];
10584 src2 = operands[2];
10586 /* Emit the instruction. */
10588 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10589 if (reload_in_progress)
10591 /* Reload doesn't know about the flags register, and doesn't know that
10592 it doesn't want to clobber it. We can only do this with PLUS. */
10593 gcc_assert (code == PLUS);
10598 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10599 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10602 /* Fix up the destination if needed. */
10603 if (dst != operands[0])
10604 emit_move_insn (operands[0], dst);
10607 /* Return TRUE or FALSE depending on whether the binary operator meets the
10608 appropriate constraints. */
10611 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10614 rtx dst = operands[0];
10615 rtx src1 = operands[1];
10616 rtx src2 = operands[2];
10618 /* Both source operands cannot be in memory. */
10619 if (MEM_P (src1) && MEM_P (src2))
10622 /* Canonicalize operand order for commutative operators. */
10623 if (ix86_swap_binary_operands_p (code, mode, operands))
10630 /* If the destination is memory, we must have a matching source operand. */
10631 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10634 /* Source 1 cannot be a constant. */
10635 if (CONSTANT_P (src1))
10638 /* Source 1 cannot be a non-matching memory. */
10639 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10645 /* Attempt to expand a unary operator. Make the expansion closer to the
10646 actual machine, then just general_operand, which will allow 2 separate
10647 memory references (one output, one input) in a single insn. */
10650 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10653 int matching_memory;
10654 rtx src, dst, op, clob;
10659 /* If the destination is memory, and we do not have matching source
10660 operands, do things in registers. */
10661 matching_memory = 0;
10664 if (rtx_equal_p (dst, src))
10665 matching_memory = 1;
10667 dst = gen_reg_rtx (mode);
10670 /* When source operand is memory, destination must match. */
10671 if (MEM_P (src) && !matching_memory)
10672 src = force_reg (mode, src);
10674 /* Emit the instruction. */
10676 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10677 if (reload_in_progress || code == NOT)
10679 /* Reload doesn't know about the flags register, and doesn't know that
10680 it doesn't want to clobber it. */
10681 gcc_assert (code == NOT);
10686 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10687 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10690 /* Fix up the destination if needed. */
10691 if (dst != operands[0])
10692 emit_move_insn (operands[0], dst);
10695 /* Return TRUE or FALSE depending on whether the unary operator meets the
10696 appropriate constraints. */
10699 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10700 enum machine_mode mode ATTRIBUTE_UNUSED,
10701 rtx operands[2] ATTRIBUTE_UNUSED)
10703 /* If one of operands is memory, source and destination must match. */
10704 if ((MEM_P (operands[0])
10705 || MEM_P (operands[1]))
10706 && ! rtx_equal_p (operands[0], operands[1]))
10711 /* Post-reload splitter for converting an SF or DFmode value in an
10712 SSE register into an unsigned SImode. */
10715 ix86_split_convert_uns_si_sse (rtx operands[])
10717 enum machine_mode vecmode;
10718 rtx value, large, zero_or_two31, input, two31, x;
10720 large = operands[1];
10721 zero_or_two31 = operands[2];
10722 input = operands[3];
10723 two31 = operands[4];
10724 vecmode = GET_MODE (large);
10725 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10727 /* Load up the value into the low element. We must ensure that the other
10728 elements are valid floats -- zero is the easiest such value. */
10731 if (vecmode == V4SFmode)
10732 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10734 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10738 input = gen_rtx_REG (vecmode, REGNO (input));
10739 emit_move_insn (value, CONST0_RTX (vecmode));
10740 if (vecmode == V4SFmode)
10741 emit_insn (gen_sse_movss (value, value, input));
10743 emit_insn (gen_sse2_movsd (value, value, input));
10746 emit_move_insn (large, two31);
10747 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10749 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10750 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10752 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10753 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10755 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10756 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10758 large = gen_rtx_REG (V4SImode, REGNO (large));
10759 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10761 x = gen_rtx_REG (V4SImode, REGNO (value));
10762 if (vecmode == V4SFmode)
10763 emit_insn (gen_sse2_cvttps2dq (x, value));
10765 emit_insn (gen_sse2_cvttpd2dq (x, value));
10768 emit_insn (gen_xorv4si3 (value, value, large));
10771 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10772 Expects the 64-bit DImode to be supplied in a pair of integral
10773 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10774 -mfpmath=sse, !optimize_size only. */
10777 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10779 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10780 rtx int_xmm, fp_xmm;
10781 rtx biases, exponents;
10784 int_xmm = gen_reg_rtx (V4SImode);
10785 if (TARGET_INTER_UNIT_MOVES)
10786 emit_insn (gen_movdi_to_sse (int_xmm, input));
10787 else if (TARGET_SSE_SPLIT_REGS)
10789 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10790 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10794 x = gen_reg_rtx (V2DImode);
10795 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10796 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10799 x = gen_rtx_CONST_VECTOR (V4SImode,
10800 gen_rtvec (4, GEN_INT (0x43300000UL),
10801 GEN_INT (0x45300000UL),
10802 const0_rtx, const0_rtx));
10803 exponents = validize_mem (force_const_mem (V4SImode, x));
10805 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10806 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10808 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10809 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10810 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10811 (0x1.0p84 + double(fp_value_hi_xmm)).
10812 Note these exponents differ by 32. */
10814 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10816 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10817 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10818 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10819 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10820 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10821 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10822 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10823 biases = validize_mem (force_const_mem (V2DFmode, biases));
10824 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10826 /* Add the upper and lower DFmode values together. */
10828 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10831 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10832 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10833 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10836 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10839 /* Convert an unsigned SImode value into a DFmode. Only currently used
10840 for SSE, but applicable anywhere. */
10843 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10845 REAL_VALUE_TYPE TWO31r;
10848 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10849 NULL, 1, OPTAB_DIRECT);
10851 fp = gen_reg_rtx (DFmode);
10852 emit_insn (gen_floatsidf2 (fp, x));
10854 real_ldexp (&TWO31r, &dconst1, 31);
10855 x = const_double_from_real_value (TWO31r, DFmode);
10857 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10859 emit_move_insn (target, x);
10862 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10863 32-bit mode; otherwise we have a direct convert instruction. */
10866 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10868 REAL_VALUE_TYPE TWO32r;
10869 rtx fp_lo, fp_hi, x;
10871 fp_lo = gen_reg_rtx (DFmode);
10872 fp_hi = gen_reg_rtx (DFmode);
10874 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10876 real_ldexp (&TWO32r, &dconst1, 32);
10877 x = const_double_from_real_value (TWO32r, DFmode);
10878 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10880 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10882 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10885 emit_move_insn (target, x);
10888 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10889 For x86_32, -mfpmath=sse, !optimize_size only. */
10891 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10893 REAL_VALUE_TYPE ONE16r;
10894 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10896 real_ldexp (&ONE16r, &dconst1, 16);
10897 x = const_double_from_real_value (ONE16r, SFmode);
10898 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10899 NULL, 0, OPTAB_DIRECT);
10900 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10901 NULL, 0, OPTAB_DIRECT);
10902 fp_hi = gen_reg_rtx (SFmode);
10903 fp_lo = gen_reg_rtx (SFmode);
10904 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10905 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10906 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10908 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10910 if (!rtx_equal_p (target, fp_hi))
10911 emit_move_insn (target, fp_hi);
10914 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10915 then replicate the value for all elements of the vector
10919 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10926 v = gen_rtvec (4, value, value, value, value);
10927 return gen_rtx_CONST_VECTOR (V4SImode, v);
10931 v = gen_rtvec (2, value, value);
10932 return gen_rtx_CONST_VECTOR (V2DImode, v);
10936 v = gen_rtvec (4, value, value, value, value);
10938 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10939 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10940 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10944 v = gen_rtvec (2, value, value);
10946 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10947 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10950 gcc_unreachable ();
10954 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10955 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10956 for an SSE register. If VECT is true, then replicate the mask for
10957 all elements of the vector register. If INVERT is true, then create
10958 a mask excluding the sign bit. */
10961 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10963 enum machine_mode vec_mode, imode;
10964 HOST_WIDE_INT hi, lo;
10969 /* Find the sign bit, sign extended to 2*HWI. */
10975 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10976 lo = 0x80000000, hi = lo < 0;
10982 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10983 if (HOST_BITS_PER_WIDE_INT >= 64)
10984 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10986 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10992 vec_mode = VOIDmode;
10993 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10994 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10998 gcc_unreachable ();
11002 lo = ~lo, hi = ~hi;
11004 /* Force this value into the low part of a fp vector constant. */
11005 mask = immed_double_const (lo, hi, imode);
11006 mask = gen_lowpart (mode, mask);
11008 if (vec_mode == VOIDmode)
11009 return force_reg (mode, mask);
11011 v = ix86_build_const_vector (mode, vect, mask);
11012 return force_reg (vec_mode, v);
11015 /* Generate code for floating point ABS or NEG. */
11018 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11021 rtx mask, set, use, clob, dst, src;
11022 bool matching_memory;
11023 bool use_sse = false;
11024 bool vector_mode = VECTOR_MODE_P (mode);
11025 enum machine_mode elt_mode = mode;
11029 elt_mode = GET_MODE_INNER (mode);
11032 else if (mode == TFmode)
11034 else if (TARGET_SSE_MATH)
11035 use_sse = SSE_FLOAT_MODE_P (mode);
11037 /* NEG and ABS performed with SSE use bitwise mask operations.
11038 Create the appropriate mask now. */
11040 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11047 /* If the destination is memory, and we don't have matching source
11048 operands or we're using the x87, do things in registers. */
11049 matching_memory = false;
11052 if (use_sse && rtx_equal_p (dst, src))
11053 matching_memory = true;
11055 dst = gen_reg_rtx (mode);
11057 if (MEM_P (src) && !matching_memory)
11058 src = force_reg (mode, src);
11062 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11063 set = gen_rtx_SET (VOIDmode, dst, set);
11068 set = gen_rtx_fmt_e (code, mode, src);
11069 set = gen_rtx_SET (VOIDmode, dst, set);
11072 use = gen_rtx_USE (VOIDmode, mask);
11073 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11074 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11075 gen_rtvec (3, set, use, clob)));
11081 if (dst != operands[0])
11082 emit_move_insn (operands[0], dst);
11085 /* Expand a copysign operation. Special case operand 0 being a constant. */
11088 ix86_expand_copysign (rtx operands[])
11090 enum machine_mode mode, vmode;
11091 rtx dest, op0, op1, mask, nmask;
11093 dest = operands[0];
11097 mode = GET_MODE (dest);
11098 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11100 if (GET_CODE (op0) == CONST_DOUBLE)
11102 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11104 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11105 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11107 if (mode == SFmode || mode == DFmode)
11109 if (op0 == CONST0_RTX (mode))
11110 op0 = CONST0_RTX (vmode);
11115 if (mode == SFmode)
11116 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11117 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11119 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11120 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11124 mask = ix86_build_signbit_mask (mode, 0, 0);
11126 if (mode == SFmode)
11127 copysign_insn = gen_copysignsf3_const;
11128 else if (mode == DFmode)
11129 copysign_insn = gen_copysigndf3_const;
11131 copysign_insn = gen_copysigntf3_const;
11133 emit_insn (copysign_insn (dest, op0, op1, mask));
11137 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11139 nmask = ix86_build_signbit_mask (mode, 0, 1);
11140 mask = ix86_build_signbit_mask (mode, 0, 0);
11142 if (mode == SFmode)
11143 copysign_insn = gen_copysignsf3_var;
11144 else if (mode == DFmode)
11145 copysign_insn = gen_copysigndf3_var;
11147 copysign_insn = gen_copysigntf3_var;
11149 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11153 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11154 be a constant, and so has already been expanded into a vector constant. */
11157 ix86_split_copysign_const (rtx operands[])
11159 enum machine_mode mode, vmode;
11160 rtx dest, op0, op1, mask, x;
11162 dest = operands[0];
11165 mask = operands[3];
11167 mode = GET_MODE (dest);
11168 vmode = GET_MODE (mask);
11170 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11171 x = gen_rtx_AND (vmode, dest, mask);
11172 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11174 if (op0 != CONST0_RTX (vmode))
11176 x = gen_rtx_IOR (vmode, dest, op0);
11177 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11181 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11182 so we have to do two masks. */
11185 ix86_split_copysign_var (rtx operands[])
11187 enum machine_mode mode, vmode;
11188 rtx dest, scratch, op0, op1, mask, nmask, x;
11190 dest = operands[0];
11191 scratch = operands[1];
11194 nmask = operands[4];
11195 mask = operands[5];
11197 mode = GET_MODE (dest);
11198 vmode = GET_MODE (mask);
11200 if (rtx_equal_p (op0, op1))
11202 /* Shouldn't happen often (it's useless, obviously), but when it does
11203 we'd generate incorrect code if we continue below. */
11204 emit_move_insn (dest, op0);
11208 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11210 gcc_assert (REGNO (op1) == REGNO (scratch));
11212 x = gen_rtx_AND (vmode, scratch, mask);
11213 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11216 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11217 x = gen_rtx_NOT (vmode, dest);
11218 x = gen_rtx_AND (vmode, x, op0);
11219 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11223 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11225 x = gen_rtx_AND (vmode, scratch, mask);
11227 else /* alternative 2,4 */
11229 gcc_assert (REGNO (mask) == REGNO (scratch));
11230 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11231 x = gen_rtx_AND (vmode, scratch, op1);
11233 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11235 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11237 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11238 x = gen_rtx_AND (vmode, dest, nmask);
11240 else /* alternative 3,4 */
11242 gcc_assert (REGNO (nmask) == REGNO (dest));
11244 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11245 x = gen_rtx_AND (vmode, dest, op0);
11247 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11250 x = gen_rtx_IOR (vmode, dest, scratch);
11251 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11254 /* Return TRUE or FALSE depending on whether the first SET in INSN
11255 has source and destination with matching CC modes, and that the
11256 CC mode is at least as constrained as REQ_MODE. */
11259 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11262 enum machine_mode set_mode;
11264 set = PATTERN (insn);
11265 if (GET_CODE (set) == PARALLEL)
11266 set = XVECEXP (set, 0, 0);
11267 gcc_assert (GET_CODE (set) == SET);
11268 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11270 set_mode = GET_MODE (SET_DEST (set));
11274 if (req_mode != CCNOmode
11275 && (req_mode != CCmode
11276 || XEXP (SET_SRC (set), 1) != const0_rtx))
11280 if (req_mode == CCGCmode)
11284 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11288 if (req_mode == CCZmode)
11295 gcc_unreachable ();
11298 return (GET_MODE (SET_SRC (set)) == set_mode);
11301 /* Generate insn patterns to do an integer compare of OPERANDS. */
11304 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11306 enum machine_mode cmpmode;
11309 cmpmode = SELECT_CC_MODE (code, op0, op1);
11310 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11312 /* This is very simple, but making the interface the same as in the
11313 FP case makes the rest of the code easier. */
11314 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11315 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11317 /* Return the test that should be put into the flags user, i.e.
11318 the bcc, scc, or cmov instruction. */
11319 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11322 /* Figure out whether to use ordered or unordered fp comparisons.
11323 Return the appropriate mode to use. */
11326 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11328 /* ??? In order to make all comparisons reversible, we do all comparisons
11329 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11330 all forms trapping and nontrapping comparisons, we can make inequality
11331 comparisons trapping again, since it results in better code when using
11332 FCOM based compares. */
11333 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11337 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11339 enum machine_mode mode = GET_MODE (op0);
11341 if (SCALAR_FLOAT_MODE_P (mode))
11343 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11344 return ix86_fp_compare_mode (code);
11349 /* Only zero flag is needed. */
11350 case EQ: /* ZF=0 */
11351 case NE: /* ZF!=0 */
11353 /* Codes needing carry flag. */
11354 case GEU: /* CF=0 */
11355 case LTU: /* CF=1 */
11356 /* Detect overflow checks. They need just the carry flag. */
11357 if (GET_CODE (op0) == PLUS
11358 && rtx_equal_p (op1, XEXP (op0, 0)))
11362 case GTU: /* CF=0 & ZF=0 */
11363 case LEU: /* CF=1 | ZF=1 */
11364 /* Detect overflow checks. They need just the carry flag. */
11365 if (GET_CODE (op0) == MINUS
11366 && rtx_equal_p (op1, XEXP (op0, 0)))
11370 /* Codes possibly doable only with sign flag when
11371 comparing against zero. */
11372 case GE: /* SF=OF or SF=0 */
11373 case LT: /* SF<>OF or SF=1 */
11374 if (op1 == const0_rtx)
11377 /* For other cases Carry flag is not required. */
11379 /* Codes doable only with sign flag when comparing
11380 against zero, but we miss jump instruction for it
11381 so we need to use relational tests against overflow
11382 that thus needs to be zero. */
11383 case GT: /* ZF=0 & SF=OF */
11384 case LE: /* ZF=1 | SF<>OF */
11385 if (op1 == const0_rtx)
11389 /* strcmp pattern do (use flags) and combine may ask us for proper
11394 gcc_unreachable ();
11398 /* Return the fixed registers used for condition codes. */
11401 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11408 /* If two condition code modes are compatible, return a condition code
11409 mode which is compatible with both. Otherwise, return
11412 static enum machine_mode
11413 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11418 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11421 if ((m1 == CCGCmode && m2 == CCGOCmode)
11422 || (m1 == CCGOCmode && m2 == CCGCmode))
11428 gcc_unreachable ();
11458 /* These are only compatible with themselves, which we already
11464 /* Split comparison code CODE into comparisons we can do using branch
11465 instructions. BYPASS_CODE is comparison code for branch that will
11466 branch around FIRST_CODE and SECOND_CODE. If some of branches
11467 is not required, set value to UNKNOWN.
11468 We never require more than two branches. */
11471 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11472 enum rtx_code *first_code,
11473 enum rtx_code *second_code)
11475 *first_code = code;
11476 *bypass_code = UNKNOWN;
11477 *second_code = UNKNOWN;
11479 /* The fcomi comparison sets flags as follows:
11489 case GT: /* GTU - CF=0 & ZF=0 */
11490 case GE: /* GEU - CF=0 */
11491 case ORDERED: /* PF=0 */
11492 case UNORDERED: /* PF=1 */
11493 case UNEQ: /* EQ - ZF=1 */
11494 case UNLT: /* LTU - CF=1 */
11495 case UNLE: /* LEU - CF=1 | ZF=1 */
11496 case LTGT: /* EQ - ZF=0 */
11498 case LT: /* LTU - CF=1 - fails on unordered */
11499 *first_code = UNLT;
11500 *bypass_code = UNORDERED;
11502 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11503 *first_code = UNLE;
11504 *bypass_code = UNORDERED;
11506 case EQ: /* EQ - ZF=1 - fails on unordered */
11507 *first_code = UNEQ;
11508 *bypass_code = UNORDERED;
11510 case NE: /* NE - ZF=0 - fails on unordered */
11511 *first_code = LTGT;
11512 *second_code = UNORDERED;
11514 case UNGE: /* GEU - CF=0 - fails on unordered */
11516 *second_code = UNORDERED;
11518 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11520 *second_code = UNORDERED;
11523 gcc_unreachable ();
11525 if (!TARGET_IEEE_FP)
11527 *second_code = UNKNOWN;
11528 *bypass_code = UNKNOWN;
11532 /* Return cost of comparison done fcom + arithmetics operations on AX.
11533 All following functions do use number of instructions as a cost metrics.
11534 In future this should be tweaked to compute bytes for optimize_size and
11535 take into account performance of various instructions on various CPUs. */
11537 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11539 if (!TARGET_IEEE_FP)
11541 /* The cost of code output by ix86_expand_fp_compare. */
11565 gcc_unreachable ();
11569 /* Return cost of comparison done using fcomi operation.
11570 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11572 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11574 enum rtx_code bypass_code, first_code, second_code;
11575 /* Return arbitrarily high cost when instruction is not supported - this
11576 prevents gcc from using it. */
11579 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11580 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11583 /* Return cost of comparison done using sahf operation.
11584 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11586 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11588 enum rtx_code bypass_code, first_code, second_code;
11589 /* Return arbitrarily high cost when instruction is not preferred - this
11590 avoids gcc from using it. */
11591 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11593 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11594 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11597 /* Compute cost of the comparison done using any method.
11598 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11600 ix86_fp_comparison_cost (enum rtx_code code)
11602 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11605 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11606 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11608 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11609 if (min > sahf_cost)
11611 if (min > fcomi_cost)
11616 /* Return true if we should use an FCOMI instruction for this
11620 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11622 enum rtx_code swapped_code = swap_condition (code);
11624 return ((ix86_fp_comparison_cost (code)
11625 == ix86_fp_comparison_fcomi_cost (code))
11626 || (ix86_fp_comparison_cost (swapped_code)
11627 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11630 /* Swap, force into registers, or otherwise massage the two operands
11631 to a fp comparison. The operands are updated in place; the new
11632 comparison code is returned. */
11634 static enum rtx_code
11635 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11637 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11638 rtx op0 = *pop0, op1 = *pop1;
11639 enum machine_mode op_mode = GET_MODE (op0);
11640 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11642 /* All of the unordered compare instructions only work on registers.
11643 The same is true of the fcomi compare instructions. The XFmode
11644 compare instructions require registers except when comparing
11645 against zero or when converting operand 1 from fixed point to
11649 && (fpcmp_mode == CCFPUmode
11650 || (op_mode == XFmode
11651 && ! (standard_80387_constant_p (op0) == 1
11652 || standard_80387_constant_p (op1) == 1)
11653 && GET_CODE (op1) != FLOAT)
11654 || ix86_use_fcomi_compare (code)))
11656 op0 = force_reg (op_mode, op0);
11657 op1 = force_reg (op_mode, op1);
11661 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11662 things around if they appear profitable, otherwise force op0
11663 into a register. */
11665 if (standard_80387_constant_p (op0) == 0
11667 && ! (standard_80387_constant_p (op1) == 0
11671 tmp = op0, op0 = op1, op1 = tmp;
11672 code = swap_condition (code);
11676 op0 = force_reg (op_mode, op0);
11678 if (CONSTANT_P (op1))
11680 int tmp = standard_80387_constant_p (op1);
11682 op1 = validize_mem (force_const_mem (op_mode, op1));
11686 op1 = force_reg (op_mode, op1);
11689 op1 = force_reg (op_mode, op1);
11693 /* Try to rearrange the comparison to make it cheaper. */
11694 if (ix86_fp_comparison_cost (code)
11695 > ix86_fp_comparison_cost (swap_condition (code))
11696 && (REG_P (op1) || can_create_pseudo_p ()))
11699 tmp = op0, op0 = op1, op1 = tmp;
11700 code = swap_condition (code);
11702 op0 = force_reg (op_mode, op0);
11710 /* Convert comparison codes we use to represent FP comparison to integer
11711 code that will result in proper branch. Return UNKNOWN if no such code
11715 ix86_fp_compare_code_to_integer (enum rtx_code code)
11744 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11747 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11748 rtx *second_test, rtx *bypass_test)
11750 enum machine_mode fpcmp_mode, intcmp_mode;
11752 int cost = ix86_fp_comparison_cost (code);
11753 enum rtx_code bypass_code, first_code, second_code;
11755 fpcmp_mode = ix86_fp_compare_mode (code);
11756 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11759 *second_test = NULL_RTX;
11761 *bypass_test = NULL_RTX;
11763 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11765 /* Do fcomi/sahf based test when profitable. */
11766 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11767 && (bypass_code == UNKNOWN || bypass_test)
11768 && (second_code == UNKNOWN || second_test))
11770 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11771 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11777 gcc_assert (TARGET_SAHF);
11780 scratch = gen_reg_rtx (HImode);
11781 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11783 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11786 /* The FP codes work out to act like unsigned. */
11787 intcmp_mode = fpcmp_mode;
11789 if (bypass_code != UNKNOWN)
11790 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11791 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11793 if (second_code != UNKNOWN)
11794 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11795 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11800 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11801 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11802 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11804 scratch = gen_reg_rtx (HImode);
11805 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11807 /* In the unordered case, we have to check C2 for NaN's, which
11808 doesn't happen to work out to anything nice combination-wise.
11809 So do some bit twiddling on the value we've got in AH to come
11810 up with an appropriate set of condition codes. */
11812 intcmp_mode = CCNOmode;
11817 if (code == GT || !TARGET_IEEE_FP)
11819 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11824 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11825 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11826 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11827 intcmp_mode = CCmode;
11833 if (code == LT && TARGET_IEEE_FP)
11835 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11836 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11837 intcmp_mode = CCmode;
11842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11848 if (code == GE || !TARGET_IEEE_FP)
11850 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11855 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11856 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11863 if (code == LE && TARGET_IEEE_FP)
11865 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11866 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11867 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11868 intcmp_mode = CCmode;
11873 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11879 if (code == EQ && TARGET_IEEE_FP)
11881 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11882 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11883 intcmp_mode = CCmode;
11888 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11895 if (code == NE && TARGET_IEEE_FP)
11897 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11898 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11904 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11910 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11914 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11919 gcc_unreachable ();
11923 /* Return the test that should be put into the flags user, i.e.
11924 the bcc, scc, or cmov instruction. */
11925 return gen_rtx_fmt_ee (code, VOIDmode,
11926 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11931 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11934 op0 = ix86_compare_op0;
11935 op1 = ix86_compare_op1;
11938 *second_test = NULL_RTX;
11940 *bypass_test = NULL_RTX;
11942 if (ix86_compare_emitted)
11944 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11945 ix86_compare_emitted = NULL_RTX;
11947 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11949 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11950 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11951 second_test, bypass_test);
11954 ret = ix86_expand_int_compare (code, op0, op1);
11959 /* Return true if the CODE will result in nontrivial jump sequence. */
11961 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11963 enum rtx_code bypass_code, first_code, second_code;
11966 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11967 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11971 ix86_expand_branch (enum rtx_code code, rtx label)
11975 /* If we have emitted a compare insn, go straight to simple.
11976 ix86_expand_compare won't emit anything if ix86_compare_emitted
11978 if (ix86_compare_emitted)
11981 switch (GET_MODE (ix86_compare_op0))
11987 tmp = ix86_expand_compare (code, NULL, NULL);
11988 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11989 gen_rtx_LABEL_REF (VOIDmode, label),
11991 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12000 enum rtx_code bypass_code, first_code, second_code;
12002 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12003 &ix86_compare_op1);
12005 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12007 /* Check whether we will use the natural sequence with one jump. If
12008 so, we can expand jump early. Otherwise delay expansion by
12009 creating compound insn to not confuse optimizers. */
12010 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12012 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12013 gen_rtx_LABEL_REF (VOIDmode, label),
12014 pc_rtx, NULL_RTX, NULL_RTX);
12018 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12019 ix86_compare_op0, ix86_compare_op1);
12020 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12021 gen_rtx_LABEL_REF (VOIDmode, label),
12023 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12025 use_fcomi = ix86_use_fcomi_compare (code);
12026 vec = rtvec_alloc (3 + !use_fcomi);
12027 RTVEC_ELT (vec, 0) = tmp;
12029 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12031 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12034 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12036 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12045 /* Expand DImode branch into multiple compare+branch. */
12047 rtx lo[2], hi[2], label2;
12048 enum rtx_code code1, code2, code3;
12049 enum machine_mode submode;
12051 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12053 tmp = ix86_compare_op0;
12054 ix86_compare_op0 = ix86_compare_op1;
12055 ix86_compare_op1 = tmp;
12056 code = swap_condition (code);
12058 if (GET_MODE (ix86_compare_op0) == DImode)
12060 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12061 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12066 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12067 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12071 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12072 avoid two branches. This costs one extra insn, so disable when
12073 optimizing for size. */
12075 if ((code == EQ || code == NE)
12077 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12082 if (hi[1] != const0_rtx)
12083 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12084 NULL_RTX, 0, OPTAB_WIDEN);
12087 if (lo[1] != const0_rtx)
12088 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12089 NULL_RTX, 0, OPTAB_WIDEN);
12091 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12092 NULL_RTX, 0, OPTAB_WIDEN);
12094 ix86_compare_op0 = tmp;
12095 ix86_compare_op1 = const0_rtx;
12096 ix86_expand_branch (code, label);
12100 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12101 op1 is a constant and the low word is zero, then we can just
12102 examine the high word. Similarly for low word -1 and
12103 less-or-equal-than or greater-than. */
12105 if (CONST_INT_P (hi[1]))
12108 case LT: case LTU: case GE: case GEU:
12109 if (lo[1] == const0_rtx)
12111 ix86_compare_op0 = hi[0];
12112 ix86_compare_op1 = hi[1];
12113 ix86_expand_branch (code, label);
12116 case LE: case LEU: case GT: case GTU:
12117 if (lo[1] == constm1_rtx)
12119 ix86_compare_op0 = hi[0];
12120 ix86_compare_op1 = hi[1];
12121 ix86_expand_branch (code, label);
12128 /* Otherwise, we need two or three jumps. */
12130 label2 = gen_label_rtx ();
12133 code2 = swap_condition (code);
12134 code3 = unsigned_condition (code);
12138 case LT: case GT: case LTU: case GTU:
12141 case LE: code1 = LT; code2 = GT; break;
12142 case GE: code1 = GT; code2 = LT; break;
12143 case LEU: code1 = LTU; code2 = GTU; break;
12144 case GEU: code1 = GTU; code2 = LTU; break;
12146 case EQ: code1 = UNKNOWN; code2 = NE; break;
12147 case NE: code2 = UNKNOWN; break;
12150 gcc_unreachable ();
12155 * if (hi(a) < hi(b)) goto true;
12156 * if (hi(a) > hi(b)) goto false;
12157 * if (lo(a) < lo(b)) goto true;
12161 ix86_compare_op0 = hi[0];
12162 ix86_compare_op1 = hi[1];
12164 if (code1 != UNKNOWN)
12165 ix86_expand_branch (code1, label);
12166 if (code2 != UNKNOWN)
12167 ix86_expand_branch (code2, label2);
12169 ix86_compare_op0 = lo[0];
12170 ix86_compare_op1 = lo[1];
12171 ix86_expand_branch (code3, label);
12173 if (code2 != UNKNOWN)
12174 emit_label (label2);
12179 gcc_unreachable ();
12183 /* Split branch based on floating point condition. */
12185 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12186 rtx target1, rtx target2, rtx tmp, rtx pushed)
12188 rtx second, bypass;
12189 rtx label = NULL_RTX;
12191 int bypass_probability = -1, second_probability = -1, probability = -1;
12194 if (target2 != pc_rtx)
12197 code = reverse_condition_maybe_unordered (code);
12202 condition = ix86_expand_fp_compare (code, op1, op2,
12203 tmp, &second, &bypass);
12205 /* Remove pushed operand from stack. */
12207 ix86_free_from_memory (GET_MODE (pushed));
12209 if (split_branch_probability >= 0)
12211 /* Distribute the probabilities across the jumps.
12212 Assume the BYPASS and SECOND to be always test
12214 probability = split_branch_probability;
12216 /* Value of 1 is low enough to make no need for probability
12217 to be updated. Later we may run some experiments and see
12218 if unordered values are more frequent in practice. */
12220 bypass_probability = 1;
12222 second_probability = 1;
12224 if (bypass != NULL_RTX)
12226 label = gen_label_rtx ();
12227 i = emit_jump_insn (gen_rtx_SET
12229 gen_rtx_IF_THEN_ELSE (VOIDmode,
12231 gen_rtx_LABEL_REF (VOIDmode,
12234 if (bypass_probability >= 0)
12236 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12237 GEN_INT (bypass_probability),
12240 i = emit_jump_insn (gen_rtx_SET
12242 gen_rtx_IF_THEN_ELSE (VOIDmode,
12243 condition, target1, target2)));
12244 if (probability >= 0)
12246 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12247 GEN_INT (probability),
12249 if (second != NULL_RTX)
12251 i = emit_jump_insn (gen_rtx_SET
12253 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12255 if (second_probability >= 0)
12257 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12258 GEN_INT (second_probability),
12261 if (label != NULL_RTX)
12262 emit_label (label);
12266 ix86_expand_setcc (enum rtx_code code, rtx dest)
12268 rtx ret, tmp, tmpreg, equiv;
12269 rtx second_test, bypass_test;
12271 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12272 return 0; /* FAIL */
12274 gcc_assert (GET_MODE (dest) == QImode);
12276 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12277 PUT_MODE (ret, QImode);
12282 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12283 if (bypass_test || second_test)
12285 rtx test = second_test;
12287 rtx tmp2 = gen_reg_rtx (QImode);
12290 gcc_assert (!second_test);
12291 test = bypass_test;
12293 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12295 PUT_MODE (test, QImode);
12296 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12299 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12301 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12304 /* Attach a REG_EQUAL note describing the comparison result. */
12305 if (ix86_compare_op0 && ix86_compare_op1)
12307 equiv = simplify_gen_relational (code, QImode,
12308 GET_MODE (ix86_compare_op0),
12309 ix86_compare_op0, ix86_compare_op1);
12310 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12313 return 1; /* DONE */
12316 /* Expand comparison setting or clearing carry flag. Return true when
12317 successful and set pop for the operation. */
12319 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12321 enum machine_mode mode =
12322 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12324 /* Do not handle DImode compares that go through special path. */
12325 if (mode == (TARGET_64BIT ? TImode : DImode))
12328 if (SCALAR_FLOAT_MODE_P (mode))
12330 rtx second_test = NULL, bypass_test = NULL;
12331 rtx compare_op, compare_seq;
12333 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12335 /* Shortcut: following common codes never translate
12336 into carry flag compares. */
12337 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12338 || code == ORDERED || code == UNORDERED)
12341 /* These comparisons require zero flag; swap operands so they won't. */
12342 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12343 && !TARGET_IEEE_FP)
12348 code = swap_condition (code);
12351 /* Try to expand the comparison and verify that we end up with
12352 carry flag based comparison. This fails to be true only when
12353 we decide to expand comparison using arithmetic that is not
12354 too common scenario. */
12356 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12357 &second_test, &bypass_test);
12358 compare_seq = get_insns ();
12361 if (second_test || bypass_test)
12364 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12365 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12366 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12368 code = GET_CODE (compare_op);
12370 if (code != LTU && code != GEU)
12373 emit_insn (compare_seq);
12378 if (!INTEGRAL_MODE_P (mode))
12387 /* Convert a==0 into (unsigned)a<1. */
12390 if (op1 != const0_rtx)
12393 code = (code == EQ ? LTU : GEU);
12396 /* Convert a>b into b<a or a>=b-1. */
12399 if (CONST_INT_P (op1))
12401 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12402 /* Bail out on overflow. We still can swap operands but that
12403 would force loading of the constant into register. */
12404 if (op1 == const0_rtx
12405 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12407 code = (code == GTU ? GEU : LTU);
12414 code = (code == GTU ? LTU : GEU);
12418 /* Convert a>=0 into (unsigned)a<0x80000000. */
12421 if (mode == DImode || op1 != const0_rtx)
12423 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12424 code = (code == LT ? GEU : LTU);
12428 if (mode == DImode || op1 != constm1_rtx)
12430 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12431 code = (code == LE ? GEU : LTU);
12437 /* Swapping operands may cause constant to appear as first operand. */
12438 if (!nonimmediate_operand (op0, VOIDmode))
12440 if (!can_create_pseudo_p ())
12442 op0 = force_reg (mode, op0);
12444 ix86_compare_op0 = op0;
12445 ix86_compare_op1 = op1;
12446 *pop = ix86_expand_compare (code, NULL, NULL);
12447 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12452 ix86_expand_int_movcc (rtx operands[])
12454 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12455 rtx compare_seq, compare_op;
12456 rtx second_test, bypass_test;
12457 enum machine_mode mode = GET_MODE (operands[0]);
12458 bool sign_bit_compare_p = false;;
12461 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12462 compare_seq = get_insns ();
12465 compare_code = GET_CODE (compare_op);
12467 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12468 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12469 sign_bit_compare_p = true;
12471 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12472 HImode insns, we'd be swallowed in word prefix ops. */
12474 if ((mode != HImode || TARGET_FAST_PREFIX)
12475 && (mode != (TARGET_64BIT ? TImode : DImode))
12476 && CONST_INT_P (operands[2])
12477 && CONST_INT_P (operands[3]))
12479 rtx out = operands[0];
12480 HOST_WIDE_INT ct = INTVAL (operands[2]);
12481 HOST_WIDE_INT cf = INTVAL (operands[3]);
12482 HOST_WIDE_INT diff;
12485 /* Sign bit compares are better done using shifts than we do by using
12487 if (sign_bit_compare_p
12488 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12489 ix86_compare_op1, &compare_op))
12491 /* Detect overlap between destination and compare sources. */
12494 if (!sign_bit_compare_p)
12496 bool fpcmp = false;
12498 compare_code = GET_CODE (compare_op);
12500 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12501 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12504 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12507 /* To simplify rest of code, restrict to the GEU case. */
12508 if (compare_code == LTU)
12510 HOST_WIDE_INT tmp = ct;
12513 compare_code = reverse_condition (compare_code);
12514 code = reverse_condition (code);
12519 PUT_CODE (compare_op,
12520 reverse_condition_maybe_unordered
12521 (GET_CODE (compare_op)));
12523 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12527 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12528 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12529 tmp = gen_reg_rtx (mode);
12531 if (mode == DImode)
12532 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12534 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12538 if (code == GT || code == GE)
12539 code = reverse_condition (code);
12542 HOST_WIDE_INT tmp = ct;
12547 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12548 ix86_compare_op1, VOIDmode, 0, -1);
12561 tmp = expand_simple_binop (mode, PLUS,
12563 copy_rtx (tmp), 1, OPTAB_DIRECT);
12574 tmp = expand_simple_binop (mode, IOR,
12576 copy_rtx (tmp), 1, OPTAB_DIRECT);
12578 else if (diff == -1 && ct)
12588 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12590 tmp = expand_simple_binop (mode, PLUS,
12591 copy_rtx (tmp), GEN_INT (cf),
12592 copy_rtx (tmp), 1, OPTAB_DIRECT);
12600 * andl cf - ct, dest
12610 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12613 tmp = expand_simple_binop (mode, AND,
12615 gen_int_mode (cf - ct, mode),
12616 copy_rtx (tmp), 1, OPTAB_DIRECT);
12618 tmp = expand_simple_binop (mode, PLUS,
12619 copy_rtx (tmp), GEN_INT (ct),
12620 copy_rtx (tmp), 1, OPTAB_DIRECT);
12623 if (!rtx_equal_p (tmp, out))
12624 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12626 return 1; /* DONE */
12631 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12634 tmp = ct, ct = cf, cf = tmp;
12637 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12639 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12641 /* We may be reversing unordered compare to normal compare, that
12642 is not valid in general (we may convert non-trapping condition
12643 to trapping one), however on i386 we currently emit all
12644 comparisons unordered. */
12645 compare_code = reverse_condition_maybe_unordered (compare_code);
12646 code = reverse_condition_maybe_unordered (code);
12650 compare_code = reverse_condition (compare_code);
12651 code = reverse_condition (code);
12655 compare_code = UNKNOWN;
12656 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12657 && CONST_INT_P (ix86_compare_op1))
12659 if (ix86_compare_op1 == const0_rtx
12660 && (code == LT || code == GE))
12661 compare_code = code;
12662 else if (ix86_compare_op1 == constm1_rtx)
12666 else if (code == GT)
12671 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12672 if (compare_code != UNKNOWN
12673 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12674 && (cf == -1 || ct == -1))
12676 /* If lea code below could be used, only optimize
12677 if it results in a 2 insn sequence. */
12679 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12680 || diff == 3 || diff == 5 || diff == 9)
12681 || (compare_code == LT && ct == -1)
12682 || (compare_code == GE && cf == -1))
12685 * notl op1 (if necessary)
12693 code = reverse_condition (code);
12696 out = emit_store_flag (out, code, ix86_compare_op0,
12697 ix86_compare_op1, VOIDmode, 0, -1);
12699 out = expand_simple_binop (mode, IOR,
12701 out, 1, OPTAB_DIRECT);
12702 if (out != operands[0])
12703 emit_move_insn (operands[0], out);
12705 return 1; /* DONE */
12710 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12711 || diff == 3 || diff == 5 || diff == 9)
12712 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12714 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12720 * lea cf(dest*(ct-cf)),dest
12724 * This also catches the degenerate setcc-only case.
12730 out = emit_store_flag (out, code, ix86_compare_op0,
12731 ix86_compare_op1, VOIDmode, 0, 1);
12734 /* On x86_64 the lea instruction operates on Pmode, so we need
12735 to get arithmetics done in proper mode to match. */
12737 tmp = copy_rtx (out);
12741 out1 = copy_rtx (out);
12742 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12746 tmp = gen_rtx_PLUS (mode, tmp, out1);
12752 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12755 if (!rtx_equal_p (tmp, out))
12758 out = force_operand (tmp, copy_rtx (out));
12760 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12762 if (!rtx_equal_p (out, operands[0]))
12763 emit_move_insn (operands[0], copy_rtx (out));
12765 return 1; /* DONE */
12769 * General case: Jumpful:
12770 * xorl dest,dest cmpl op1, op2
12771 * cmpl op1, op2 movl ct, dest
12772 * setcc dest jcc 1f
12773 * decl dest movl cf, dest
12774 * andl (cf-ct),dest 1:
12777 * Size 20. Size 14.
12779 * This is reasonably steep, but branch mispredict costs are
12780 * high on modern cpus, so consider failing only if optimizing
12784 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12785 && BRANCH_COST >= 2)
12789 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12794 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12796 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12798 /* We may be reversing unordered compare to normal compare,
12799 that is not valid in general (we may convert non-trapping
12800 condition to trapping one), however on i386 we currently
12801 emit all comparisons unordered. */
12802 code = reverse_condition_maybe_unordered (code);
12806 code = reverse_condition (code);
12807 if (compare_code != UNKNOWN)
12808 compare_code = reverse_condition (compare_code);
12812 if (compare_code != UNKNOWN)
12814 /* notl op1 (if needed)
12819 For x < 0 (resp. x <= -1) there will be no notl,
12820 so if possible swap the constants to get rid of the
12822 True/false will be -1/0 while code below (store flag
12823 followed by decrement) is 0/-1, so the constants need
12824 to be exchanged once more. */
12826 if (compare_code == GE || !cf)
12828 code = reverse_condition (code);
12833 HOST_WIDE_INT tmp = cf;
12838 out = emit_store_flag (out, code, ix86_compare_op0,
12839 ix86_compare_op1, VOIDmode, 0, -1);
12843 out = emit_store_flag (out, code, ix86_compare_op0,
12844 ix86_compare_op1, VOIDmode, 0, 1);
12846 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12847 copy_rtx (out), 1, OPTAB_DIRECT);
12850 out = expand_simple_binop (mode, AND, copy_rtx (out),
12851 gen_int_mode (cf - ct, mode),
12852 copy_rtx (out), 1, OPTAB_DIRECT);
12854 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12855 copy_rtx (out), 1, OPTAB_DIRECT);
12856 if (!rtx_equal_p (out, operands[0]))
12857 emit_move_insn (operands[0], copy_rtx (out));
12859 return 1; /* DONE */
12863 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12865 /* Try a few things more with specific constants and a variable. */
12868 rtx var, orig_out, out, tmp;
12870 if (BRANCH_COST <= 2)
12871 return 0; /* FAIL */
12873 /* If one of the two operands is an interesting constant, load a
12874 constant with the above and mask it in with a logical operation. */
12876 if (CONST_INT_P (operands[2]))
12879 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12880 operands[3] = constm1_rtx, op = and_optab;
12881 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12882 operands[3] = const0_rtx, op = ior_optab;
12884 return 0; /* FAIL */
12886 else if (CONST_INT_P (operands[3]))
12889 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12890 operands[2] = constm1_rtx, op = and_optab;
12891 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12892 operands[2] = const0_rtx, op = ior_optab;
12894 return 0; /* FAIL */
12897 return 0; /* FAIL */
12899 orig_out = operands[0];
12900 tmp = gen_reg_rtx (mode);
12903 /* Recurse to get the constant loaded. */
12904 if (ix86_expand_int_movcc (operands) == 0)
12905 return 0; /* FAIL */
12907 /* Mask in the interesting variable. */
12908 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12910 if (!rtx_equal_p (out, orig_out))
12911 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12913 return 1; /* DONE */
12917 * For comparison with above,
12927 if (! nonimmediate_operand (operands[2], mode))
12928 operands[2] = force_reg (mode, operands[2]);
12929 if (! nonimmediate_operand (operands[3], mode))
12930 operands[3] = force_reg (mode, operands[3]);
12932 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12934 rtx tmp = gen_reg_rtx (mode);
12935 emit_move_insn (tmp, operands[3]);
12938 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12940 rtx tmp = gen_reg_rtx (mode);
12941 emit_move_insn (tmp, operands[2]);
12945 if (! register_operand (operands[2], VOIDmode)
12947 || ! register_operand (operands[3], VOIDmode)))
12948 operands[2] = force_reg (mode, operands[2]);
12951 && ! register_operand (operands[3], VOIDmode))
12952 operands[3] = force_reg (mode, operands[3]);
12954 emit_insn (compare_seq);
12955 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12956 gen_rtx_IF_THEN_ELSE (mode,
12957 compare_op, operands[2],
12960 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12961 gen_rtx_IF_THEN_ELSE (mode,
12963 copy_rtx (operands[3]),
12964 copy_rtx (operands[0]))));
12966 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12967 gen_rtx_IF_THEN_ELSE (mode,
12969 copy_rtx (operands[2]),
12970 copy_rtx (operands[0]))));
12972 return 1; /* DONE */
12975 /* Swap, force into registers, or otherwise massage the two operands
12976 to an sse comparison with a mask result. Thus we differ a bit from
12977 ix86_prepare_fp_compare_args which expects to produce a flags result.
12979 The DEST operand exists to help determine whether to commute commutative
12980 operators. The POP0/POP1 operands are updated in place. The new
12981 comparison code is returned, or UNKNOWN if not implementable. */
12983 static enum rtx_code
12984 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12985 rtx *pop0, rtx *pop1)
12993 /* We have no LTGT as an operator. We could implement it with
12994 NE & ORDERED, but this requires an extra temporary. It's
12995 not clear that it's worth it. */
13002 /* These are supported directly. */
13009 /* For commutative operators, try to canonicalize the destination
13010 operand to be first in the comparison - this helps reload to
13011 avoid extra moves. */
13012 if (!dest || !rtx_equal_p (dest, *pop1))
13020 /* These are not supported directly. Swap the comparison operands
13021 to transform into something that is supported. */
13025 code = swap_condition (code);
13029 gcc_unreachable ();
13035 /* Detect conditional moves that exactly match min/max operational
13036 semantics. Note that this is IEEE safe, as long as we don't
13037 interchange the operands.
13039 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13040 and TRUE if the operation is successful and instructions are emitted. */
13043 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13044 rtx cmp_op1, rtx if_true, rtx if_false)
13046 enum machine_mode mode;
13052 else if (code == UNGE)
13055 if_true = if_false;
13061 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13063 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13068 mode = GET_MODE (dest);
13070 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13071 but MODE may be a vector mode and thus not appropriate. */
13072 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13074 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13077 if_true = force_reg (mode, if_true);
13078 v = gen_rtvec (2, if_true, if_false);
13079 tmp = gen_rtx_UNSPEC (mode, v, u);
13083 code = is_min ? SMIN : SMAX;
13084 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13087 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13091 /* Expand an sse vector comparison. Return the register with the result. */
13094 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13095 rtx op_true, rtx op_false)
13097 enum machine_mode mode = GET_MODE (dest);
13100 cmp_op0 = force_reg (mode, cmp_op0);
13101 if (!nonimmediate_operand (cmp_op1, mode))
13102 cmp_op1 = force_reg (mode, cmp_op1);
13105 || reg_overlap_mentioned_p (dest, op_true)
13106 || reg_overlap_mentioned_p (dest, op_false))
13107 dest = gen_reg_rtx (mode);
13109 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13110 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13115 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13116 operations. This is used for both scalar and vector conditional moves. */
13119 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13121 enum machine_mode mode = GET_MODE (dest);
13126 rtx pcmov = gen_rtx_SET (mode, dest,
13127 gen_rtx_IF_THEN_ELSE (mode, cmp,
13132 else if (op_false == CONST0_RTX (mode))
13134 op_true = force_reg (mode, op_true);
13135 x = gen_rtx_AND (mode, cmp, op_true);
13136 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13138 else if (op_true == CONST0_RTX (mode))
13140 op_false = force_reg (mode, op_false);
13141 x = gen_rtx_NOT (mode, cmp);
13142 x = gen_rtx_AND (mode, x, op_false);
13143 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13147 op_true = force_reg (mode, op_true);
13148 op_false = force_reg (mode, op_false);
13150 t2 = gen_reg_rtx (mode);
13152 t3 = gen_reg_rtx (mode);
13156 x = gen_rtx_AND (mode, op_true, cmp);
13157 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13159 x = gen_rtx_NOT (mode, cmp);
13160 x = gen_rtx_AND (mode, x, op_false);
13161 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13163 x = gen_rtx_IOR (mode, t3, t2);
13164 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13168 /* Expand a floating-point conditional move. Return true if successful. */
13171 ix86_expand_fp_movcc (rtx operands[])
13173 enum machine_mode mode = GET_MODE (operands[0]);
13174 enum rtx_code code = GET_CODE (operands[1]);
13175 rtx tmp, compare_op, second_test, bypass_test;
13177 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13179 enum machine_mode cmode;
13181 /* Since we've no cmove for sse registers, don't force bad register
13182 allocation just to gain access to it. Deny movcc when the
13183 comparison mode doesn't match the move mode. */
13184 cmode = GET_MODE (ix86_compare_op0);
13185 if (cmode == VOIDmode)
13186 cmode = GET_MODE (ix86_compare_op1);
13190 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13192 &ix86_compare_op1);
13193 if (code == UNKNOWN)
13196 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13197 ix86_compare_op1, operands[2],
13201 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13202 ix86_compare_op1, operands[2], operands[3]);
13203 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13207 /* The floating point conditional move instructions don't directly
13208 support conditions resulting from a signed integer comparison. */
13210 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13212 /* The floating point conditional move instructions don't directly
13213 support signed integer comparisons. */
13215 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13217 gcc_assert (!second_test && !bypass_test);
13218 tmp = gen_reg_rtx (QImode);
13219 ix86_expand_setcc (code, tmp);
13221 ix86_compare_op0 = tmp;
13222 ix86_compare_op1 = const0_rtx;
13223 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13225 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13227 tmp = gen_reg_rtx (mode);
13228 emit_move_insn (tmp, operands[3]);
13231 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13233 tmp = gen_reg_rtx (mode);
13234 emit_move_insn (tmp, operands[2]);
13238 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13239 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13240 operands[2], operands[3])));
13242 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13243 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13244 operands[3], operands[0])));
13246 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13247 gen_rtx_IF_THEN_ELSE (mode, second_test,
13248 operands[2], operands[0])));
13253 /* Expand a floating-point vector conditional move; a vcond operation
13254 rather than a movcc operation. */
13257 ix86_expand_fp_vcond (rtx operands[])
13259 enum rtx_code code = GET_CODE (operands[3]);
13262 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13263 &operands[4], &operands[5]);
13264 if (code == UNKNOWN)
13267 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13268 operands[5], operands[1], operands[2]))
13271 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13272 operands[1], operands[2]);
13273 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13277 /* Expand a signed/unsigned integral vector conditional move. */
13280 ix86_expand_int_vcond (rtx operands[])
13282 enum machine_mode mode = GET_MODE (operands[0]);
13283 enum rtx_code code = GET_CODE (operands[3]);
13284 bool negate = false;
13287 cop0 = operands[4];
13288 cop1 = operands[5];
13290 /* Canonicalize the comparison to EQ, GT, GTU. */
13301 code = reverse_condition (code);
13307 code = reverse_condition (code);
13313 code = swap_condition (code);
13314 x = cop0, cop0 = cop1, cop1 = x;
13318 gcc_unreachable ();
13321 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13322 if (mode == V2DImode)
13327 /* SSE4.1 supports EQ. */
13328 if (!TARGET_SSE4_1)
13334 /* SSE4.2 supports GT/GTU. */
13335 if (!TARGET_SSE4_2)
13340 gcc_unreachable ();
13344 /* Unsigned parallel compare is not supported by the hardware. Play some
13345 tricks to turn this into a signed comparison against 0. */
13348 cop0 = force_reg (mode, cop0);
13357 /* Perform a parallel modulo subtraction. */
13358 t1 = gen_reg_rtx (mode);
13359 emit_insn ((mode == V4SImode
13361 : gen_subv2di3) (t1, cop0, cop1));
13363 /* Extract the original sign bit of op0. */
13364 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13366 t2 = gen_reg_rtx (mode);
13367 emit_insn ((mode == V4SImode
13369 : gen_andv2di3) (t2, cop0, mask));
13371 /* XOR it back into the result of the subtraction. This results
13372 in the sign bit set iff we saw unsigned underflow. */
13373 x = gen_reg_rtx (mode);
13374 emit_insn ((mode == V4SImode
13376 : gen_xorv2di3) (x, t1, t2));
13384 /* Perform a parallel unsigned saturating subtraction. */
13385 x = gen_reg_rtx (mode);
13386 emit_insn (gen_rtx_SET (VOIDmode, x,
13387 gen_rtx_US_MINUS (mode, cop0, cop1)));
13394 gcc_unreachable ();
13398 cop1 = CONST0_RTX (mode);
13401 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13402 operands[1+negate], operands[2-negate]);
13404 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13405 operands[2-negate]);
13409 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13410 true if we should do zero extension, else sign extension. HIGH_P is
13411 true if we want the N/2 high elements, else the low elements. */
13414 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13416 enum machine_mode imode = GET_MODE (operands[1]);
13417 rtx (*unpack)(rtx, rtx, rtx);
13424 unpack = gen_vec_interleave_highv16qi;
13426 unpack = gen_vec_interleave_lowv16qi;
13430 unpack = gen_vec_interleave_highv8hi;
13432 unpack = gen_vec_interleave_lowv8hi;
13436 unpack = gen_vec_interleave_highv4si;
13438 unpack = gen_vec_interleave_lowv4si;
13441 gcc_unreachable ();
13444 dest = gen_lowpart (imode, operands[0]);
13447 se = force_reg (imode, CONST0_RTX (imode));
13449 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13450 operands[1], pc_rtx, pc_rtx);
13452 emit_insn (unpack (dest, operands[1], se));
13455 /* This function performs the same task as ix86_expand_sse_unpack,
13456 but with SSE4.1 instructions. */
13459 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13461 enum machine_mode imode = GET_MODE (operands[1]);
13462 rtx (*unpack)(rtx, rtx);
13469 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13471 unpack = gen_sse4_1_extendv8qiv8hi2;
13475 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13477 unpack = gen_sse4_1_extendv4hiv4si2;
13481 unpack = gen_sse4_1_zero_extendv2siv2di2;
13483 unpack = gen_sse4_1_extendv2siv2di2;
13486 gcc_unreachable ();
13489 dest = operands[0];
13492 /* Shift higher 8 bytes to lower 8 bytes. */
13493 src = gen_reg_rtx (imode);
13494 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13495 gen_lowpart (TImode, operands[1]),
13501 emit_insn (unpack (dest, src));
13504 /* This function performs the same task as ix86_expand_sse_unpack,
13505 but with amdfam15 instructions. */
13507 #define PPERM_SRC 0x00 /* copy source */
13508 #define PPERM_INVERT 0x20 /* invert source */
13509 #define PPERM_REVERSE 0x40 /* bit reverse source */
13510 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13511 #define PPERM_ZERO 0x80 /* all 0's */
13512 #define PPERM_ONES 0xa0 /* all 1's */
13513 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13514 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13516 #define PPERM_SRC1 0x00 /* use first source byte */
13517 #define PPERM_SRC2 0x10 /* use second source byte */
13520 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13522 enum machine_mode imode = GET_MODE (operands[1]);
13523 int pperm_bytes[16];
13525 int h = (high_p) ? 8 : 0;
13528 rtvec v = rtvec_alloc (16);
13531 rtx op0 = operands[0], op1 = operands[1];
13536 vs = rtvec_alloc (8);
13537 h2 = (high_p) ? 8 : 0;
13538 for (i = 0; i < 8; i++)
13540 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13541 pperm_bytes[2*i+1] = ((unsigned_p)
13543 : PPERM_SIGN | PPERM_SRC2 | i | h);
13546 for (i = 0; i < 16; i++)
13547 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13549 for (i = 0; i < 8; i++)
13550 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13552 p = gen_rtx_PARALLEL (VOIDmode, vs);
13553 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13555 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13557 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13561 vs = rtvec_alloc (4);
13562 h2 = (high_p) ? 4 : 0;
13563 for (i = 0; i < 4; i++)
13565 sign_extend = ((unsigned_p)
13567 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13568 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13569 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13570 pperm_bytes[4*i+2] = sign_extend;
13571 pperm_bytes[4*i+3] = sign_extend;
13574 for (i = 0; i < 16; i++)
13575 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13577 for (i = 0; i < 4; i++)
13578 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13580 p = gen_rtx_PARALLEL (VOIDmode, vs);
13581 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13583 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13585 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13589 vs = rtvec_alloc (2);
13590 h2 = (high_p) ? 2 : 0;
13591 for (i = 0; i < 2; i++)
13593 sign_extend = ((unsigned_p)
13595 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13596 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13597 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13598 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13599 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13600 pperm_bytes[8*i+4] = sign_extend;
13601 pperm_bytes[8*i+5] = sign_extend;
13602 pperm_bytes[8*i+6] = sign_extend;
13603 pperm_bytes[8*i+7] = sign_extend;
13606 for (i = 0; i < 16; i++)
13607 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13609 for (i = 0; i < 2; i++)
13610 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13612 p = gen_rtx_PARALLEL (VOIDmode, vs);
13613 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13615 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13617 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13621 gcc_unreachable ();
13627 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13628 next narrower integer vector type */
13630 ix86_expand_sse5_pack (rtx operands[3])
13632 enum machine_mode imode = GET_MODE (operands[0]);
13633 int pperm_bytes[16];
13635 rtvec v = rtvec_alloc (16);
13637 rtx op0 = operands[0];
13638 rtx op1 = operands[1];
13639 rtx op2 = operands[2];
13644 for (i = 0; i < 8; i++)
13646 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13647 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13650 for (i = 0; i < 16; i++)
13651 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13653 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13654 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13658 for (i = 0; i < 4; i++)
13660 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13661 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13662 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13663 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13666 for (i = 0; i < 16; i++)
13667 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13669 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13670 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13674 for (i = 0; i < 2; i++)
13676 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13677 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13678 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13679 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13680 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13681 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13682 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13683 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13686 for (i = 0; i < 16; i++)
13687 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13689 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13690 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13694 gcc_unreachable ();
13700 /* Expand conditional increment or decrement using adb/sbb instructions.
13701 The default case using setcc followed by the conditional move can be
13702 done by generic code. */
13704 ix86_expand_int_addcc (rtx operands[])
13706 enum rtx_code code = GET_CODE (operands[1]);
13708 rtx val = const0_rtx;
13709 bool fpcmp = false;
13710 enum machine_mode mode = GET_MODE (operands[0]);
13712 if (operands[3] != const1_rtx
13713 && operands[3] != constm1_rtx)
13715 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13716 ix86_compare_op1, &compare_op))
13718 code = GET_CODE (compare_op);
13720 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13721 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13724 code = ix86_fp_compare_code_to_integer (code);
13731 PUT_CODE (compare_op,
13732 reverse_condition_maybe_unordered
13733 (GET_CODE (compare_op)));
13735 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13737 PUT_MODE (compare_op, mode);
13739 /* Construct either adc or sbb insn. */
13740 if ((code == LTU) == (operands[3] == constm1_rtx))
13742 switch (GET_MODE (operands[0]))
13745 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13748 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13751 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13754 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13757 gcc_unreachable ();
13762 switch (GET_MODE (operands[0]))
13765 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13768 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13771 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13774 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13777 gcc_unreachable ();
13780 return 1; /* DONE */
13784 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13785 works for floating pointer parameters and nonoffsetable memories.
13786 For pushes, it returns just stack offsets; the values will be saved
13787 in the right order. Maximally three parts are generated. */
13790 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13795 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13797 size = (GET_MODE_SIZE (mode) + 4) / 8;
13799 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13800 gcc_assert (size >= 2 && size <= 3);
13802 /* Optimize constant pool reference to immediates. This is used by fp
13803 moves, that force all constants to memory to allow combining. */
13804 if (MEM_P (operand) && MEM_READONLY_P (operand))
13806 rtx tmp = maybe_get_pool_constant (operand);
13811 if (MEM_P (operand) && !offsettable_memref_p (operand))
13813 /* The only non-offsetable memories we handle are pushes. */
13814 int ok = push_operand (operand, VOIDmode);
13818 operand = copy_rtx (operand);
13819 PUT_MODE (operand, Pmode);
13820 parts[0] = parts[1] = parts[2] = operand;
13824 if (GET_CODE (operand) == CONST_VECTOR)
13826 enum machine_mode imode = int_mode_for_mode (mode);
13827 /* Caution: if we looked through a constant pool memory above,
13828 the operand may actually have a different mode now. That's
13829 ok, since we want to pun this all the way back to an integer. */
13830 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13831 gcc_assert (operand != NULL);
13837 if (mode == DImode)
13838 split_di (&operand, 1, &parts[0], &parts[1]);
13841 if (REG_P (operand))
13843 gcc_assert (reload_completed);
13844 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13845 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13847 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13849 else if (offsettable_memref_p (operand))
13851 operand = adjust_address (operand, SImode, 0);
13852 parts[0] = operand;
13853 parts[1] = adjust_address (operand, SImode, 4);
13855 parts[2] = adjust_address (operand, SImode, 8);
13857 else if (GET_CODE (operand) == CONST_DOUBLE)
13862 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13866 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13867 parts[2] = gen_int_mode (l[2], SImode);
13870 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13873 gcc_unreachable ();
13875 parts[1] = gen_int_mode (l[1], SImode);
13876 parts[0] = gen_int_mode (l[0], SImode);
13879 gcc_unreachable ();
13884 if (mode == TImode)
13885 split_ti (&operand, 1, &parts[0], &parts[1]);
13886 if (mode == XFmode || mode == TFmode)
13888 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13889 if (REG_P (operand))
13891 gcc_assert (reload_completed);
13892 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13893 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13895 else if (offsettable_memref_p (operand))
13897 operand = adjust_address (operand, DImode, 0);
13898 parts[0] = operand;
13899 parts[1] = adjust_address (operand, upper_mode, 8);
13901 else if (GET_CODE (operand) == CONST_DOUBLE)
13906 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13907 real_to_target (l, &r, mode);
13909 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13910 if (HOST_BITS_PER_WIDE_INT >= 64)
13913 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13914 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13917 parts[0] = immed_double_const (l[0], l[1], DImode);
13919 if (upper_mode == SImode)
13920 parts[1] = gen_int_mode (l[2], SImode);
13921 else if (HOST_BITS_PER_WIDE_INT >= 64)
13924 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13925 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13928 parts[1] = immed_double_const (l[2], l[3], DImode);
13931 gcc_unreachable ();
13938 /* Emit insns to perform a move or push of DI, DF, and XF values.
13939 Return false when normal moves are needed; true when all required
13940 insns have been emitted. Operands 2-4 contain the input values
13941 int the correct order; operands 5-7 contain the output values. */
13944 ix86_split_long_move (rtx operands[])
13949 int collisions = 0;
13950 enum machine_mode mode = GET_MODE (operands[0]);
13952 /* The DFmode expanders may ask us to move double.
13953 For 64bit target this is single move. By hiding the fact
13954 here we simplify i386.md splitters. */
13955 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13957 /* Optimize constant pool reference to immediates. This is used by
13958 fp moves, that force all constants to memory to allow combining. */
13960 if (MEM_P (operands[1])
13961 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13962 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13963 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13964 if (push_operand (operands[0], VOIDmode))
13966 operands[0] = copy_rtx (operands[0]);
13967 PUT_MODE (operands[0], Pmode);
13970 operands[0] = gen_lowpart (DImode, operands[0]);
13971 operands[1] = gen_lowpart (DImode, operands[1]);
13972 emit_move_insn (operands[0], operands[1]);
13976 /* The only non-offsettable memory we handle is push. */
13977 if (push_operand (operands[0], VOIDmode))
13980 gcc_assert (!MEM_P (operands[0])
13981 || offsettable_memref_p (operands[0]));
13983 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13984 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13986 /* When emitting push, take care for source operands on the stack. */
13987 if (push && MEM_P (operands[1])
13988 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13991 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13992 XEXP (part[1][2], 0));
13993 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13994 XEXP (part[1][1], 0));
13997 /* We need to do copy in the right order in case an address register
13998 of the source overlaps the destination. */
13999 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14001 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14003 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14006 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14009 /* Collision in the middle part can be handled by reordering. */
14010 if (collisions == 1 && nparts == 3
14011 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14014 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14015 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14018 /* If there are more collisions, we can't handle it by reordering.
14019 Do an lea to the last part and use only one colliding move. */
14020 else if (collisions > 1)
14026 base = part[0][nparts - 1];
14028 /* Handle the case when the last part isn't valid for lea.
14029 Happens in 64-bit mode storing the 12-byte XFmode. */
14030 if (GET_MODE (base) != Pmode)
14031 base = gen_rtx_REG (Pmode, REGNO (base));
14033 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14034 part[1][0] = replace_equiv_address (part[1][0], base);
14035 part[1][1] = replace_equiv_address (part[1][1],
14036 plus_constant (base, UNITS_PER_WORD));
14038 part[1][2] = replace_equiv_address (part[1][2],
14039 plus_constant (base, 8));
14049 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14050 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14051 emit_move_insn (part[0][2], part[1][2]);
14056 /* In 64bit mode we don't have 32bit push available. In case this is
14057 register, it is OK - we will just use larger counterpart. We also
14058 retype memory - these comes from attempt to avoid REX prefix on
14059 moving of second half of TFmode value. */
14060 if (GET_MODE (part[1][1]) == SImode)
14062 switch (GET_CODE (part[1][1]))
14065 part[1][1] = adjust_address (part[1][1], DImode, 0);
14069 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14073 gcc_unreachable ();
14076 if (GET_MODE (part[1][0]) == SImode)
14077 part[1][0] = part[1][1];
14080 emit_move_insn (part[0][1], part[1][1]);
14081 emit_move_insn (part[0][0], part[1][0]);
14085 /* Choose correct order to not overwrite the source before it is copied. */
14086 if ((REG_P (part[0][0])
14087 && REG_P (part[1][1])
14088 && (REGNO (part[0][0]) == REGNO (part[1][1])
14090 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14092 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14096 operands[2] = part[0][2];
14097 operands[3] = part[0][1];
14098 operands[4] = part[0][0];
14099 operands[5] = part[1][2];
14100 operands[6] = part[1][1];
14101 operands[7] = part[1][0];
14105 operands[2] = part[0][1];
14106 operands[3] = part[0][0];
14107 operands[5] = part[1][1];
14108 operands[6] = part[1][0];
14115 operands[2] = part[0][0];
14116 operands[3] = part[0][1];
14117 operands[4] = part[0][2];
14118 operands[5] = part[1][0];
14119 operands[6] = part[1][1];
14120 operands[7] = part[1][2];
14124 operands[2] = part[0][0];
14125 operands[3] = part[0][1];
14126 operands[5] = part[1][0];
14127 operands[6] = part[1][1];
14131 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14134 if (CONST_INT_P (operands[5])
14135 && operands[5] != const0_rtx
14136 && REG_P (operands[2]))
14138 if (CONST_INT_P (operands[6])
14139 && INTVAL (operands[6]) == INTVAL (operands[5]))
14140 operands[6] = operands[2];
14143 && CONST_INT_P (operands[7])
14144 && INTVAL (operands[7]) == INTVAL (operands[5]))
14145 operands[7] = operands[2];
14149 && CONST_INT_P (operands[6])
14150 && operands[6] != const0_rtx
14151 && REG_P (operands[3])
14152 && CONST_INT_P (operands[7])
14153 && INTVAL (operands[7]) == INTVAL (operands[6]))
14154 operands[7] = operands[3];
14157 emit_move_insn (operands[2], operands[5]);
14158 emit_move_insn (operands[3], operands[6]);
14160 emit_move_insn (operands[4], operands[7]);
14165 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14166 left shift by a constant, either using a single shift or
14167 a sequence of add instructions. */
14170 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14174 emit_insn ((mode == DImode
14176 : gen_adddi3) (operand, operand, operand));
14178 else if (!optimize_size
14179 && count * ix86_cost->add <= ix86_cost->shift_const)
14182 for (i=0; i<count; i++)
14184 emit_insn ((mode == DImode
14186 : gen_adddi3) (operand, operand, operand));
14190 emit_insn ((mode == DImode
14192 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14196 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14198 rtx low[2], high[2];
14200 const int single_width = mode == DImode ? 32 : 64;
14202 if (CONST_INT_P (operands[2]))
14204 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14205 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14207 if (count >= single_width)
14209 emit_move_insn (high[0], low[1]);
14210 emit_move_insn (low[0], const0_rtx);
14212 if (count > single_width)
14213 ix86_expand_ashl_const (high[0], count - single_width, mode);
14217 if (!rtx_equal_p (operands[0], operands[1]))
14218 emit_move_insn (operands[0], operands[1]);
14219 emit_insn ((mode == DImode
14221 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14222 ix86_expand_ashl_const (low[0], count, mode);
14227 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14229 if (operands[1] == const1_rtx)
14231 /* Assuming we've chosen a QImode capable registers, then 1 << N
14232 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14233 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14235 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14237 ix86_expand_clear (low[0]);
14238 ix86_expand_clear (high[0]);
14239 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14241 d = gen_lowpart (QImode, low[0]);
14242 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14243 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14244 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14246 d = gen_lowpart (QImode, high[0]);
14247 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14248 s = gen_rtx_NE (QImode, flags, const0_rtx);
14249 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14252 /* Otherwise, we can get the same results by manually performing
14253 a bit extract operation on bit 5/6, and then performing the two
14254 shifts. The two methods of getting 0/1 into low/high are exactly
14255 the same size. Avoiding the shift in the bit extract case helps
14256 pentium4 a bit; no one else seems to care much either way. */
14261 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14262 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14264 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14265 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14267 emit_insn ((mode == DImode
14269 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14270 emit_insn ((mode == DImode
14272 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14273 emit_move_insn (low[0], high[0]);
14274 emit_insn ((mode == DImode
14276 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14279 emit_insn ((mode == DImode
14281 : gen_ashldi3) (low[0], low[0], operands[2]));
14282 emit_insn ((mode == DImode
14284 : gen_ashldi3) (high[0], high[0], operands[2]));
14288 if (operands[1] == constm1_rtx)
14290 /* For -1 << N, we can avoid the shld instruction, because we
14291 know that we're shifting 0...31/63 ones into a -1. */
14292 emit_move_insn (low[0], constm1_rtx);
14294 emit_move_insn (high[0], low[0]);
14296 emit_move_insn (high[0], constm1_rtx);
14300 if (!rtx_equal_p (operands[0], operands[1]))
14301 emit_move_insn (operands[0], operands[1]);
14303 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14304 emit_insn ((mode == DImode
14306 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14309 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14311 if (TARGET_CMOVE && scratch)
14313 ix86_expand_clear (scratch);
14314 emit_insn ((mode == DImode
14315 ? gen_x86_shift_adj_1
14316 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14319 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14323 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14325 rtx low[2], high[2];
14327 const int single_width = mode == DImode ? 32 : 64;
14329 if (CONST_INT_P (operands[2]))
14331 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14332 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14334 if (count == single_width * 2 - 1)
14336 emit_move_insn (high[0], high[1]);
14337 emit_insn ((mode == DImode
14339 : gen_ashrdi3) (high[0], high[0],
14340 GEN_INT (single_width - 1)));
14341 emit_move_insn (low[0], high[0]);
14344 else if (count >= single_width)
14346 emit_move_insn (low[0], high[1]);
14347 emit_move_insn (high[0], low[0]);
14348 emit_insn ((mode == DImode
14350 : gen_ashrdi3) (high[0], high[0],
14351 GEN_INT (single_width - 1)));
14352 if (count > single_width)
14353 emit_insn ((mode == DImode
14355 : gen_ashrdi3) (low[0], low[0],
14356 GEN_INT (count - single_width)));
14360 if (!rtx_equal_p (operands[0], operands[1]))
14361 emit_move_insn (operands[0], operands[1]);
14362 emit_insn ((mode == DImode
14364 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14365 emit_insn ((mode == DImode
14367 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14372 if (!rtx_equal_p (operands[0], operands[1]))
14373 emit_move_insn (operands[0], operands[1]);
14375 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14377 emit_insn ((mode == DImode
14379 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14380 emit_insn ((mode == DImode
14382 : gen_ashrdi3) (high[0], high[0], operands[2]));
14384 if (TARGET_CMOVE && scratch)
14386 emit_move_insn (scratch, high[0]);
14387 emit_insn ((mode == DImode
14389 : gen_ashrdi3) (scratch, scratch,
14390 GEN_INT (single_width - 1)));
14391 emit_insn ((mode == DImode
14392 ? gen_x86_shift_adj_1
14393 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14397 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14402 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14404 rtx low[2], high[2];
14406 const int single_width = mode == DImode ? 32 : 64;
14408 if (CONST_INT_P (operands[2]))
14410 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14411 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14413 if (count >= single_width)
14415 emit_move_insn (low[0], high[1]);
14416 ix86_expand_clear (high[0]);
14418 if (count > single_width)
14419 emit_insn ((mode == DImode
14421 : gen_lshrdi3) (low[0], low[0],
14422 GEN_INT (count - single_width)));
14426 if (!rtx_equal_p (operands[0], operands[1]))
14427 emit_move_insn (operands[0], operands[1]);
14428 emit_insn ((mode == DImode
14430 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14431 emit_insn ((mode == DImode
14433 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14438 if (!rtx_equal_p (operands[0], operands[1]))
14439 emit_move_insn (operands[0], operands[1]);
14441 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14443 emit_insn ((mode == DImode
14445 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14446 emit_insn ((mode == DImode
14448 : gen_lshrdi3) (high[0], high[0], operands[2]));
14450 /* Heh. By reversing the arguments, we can reuse this pattern. */
14451 if (TARGET_CMOVE && scratch)
14453 ix86_expand_clear (scratch);
14454 emit_insn ((mode == DImode
14455 ? gen_x86_shift_adj_1
14456 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14460 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14464 /* Predict just emitted jump instruction to be taken with probability PROB. */
14466 predict_jump (int prob)
14468 rtx insn = get_last_insn ();
14469 gcc_assert (JUMP_P (insn));
14471 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14476 /* Helper function for the string operations below. Dest VARIABLE whether
14477 it is aligned to VALUE bytes. If true, jump to the label. */
14479 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14481 rtx label = gen_label_rtx ();
14482 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14483 if (GET_MODE (variable) == DImode)
14484 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14486 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14487 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14490 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14492 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14496 /* Adjust COUNTER by the VALUE. */
14498 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14500 if (GET_MODE (countreg) == DImode)
14501 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14503 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14506 /* Zero extend possibly SImode EXP to Pmode register. */
14508 ix86_zero_extend_to_Pmode (rtx exp)
14511 if (GET_MODE (exp) == VOIDmode)
14512 return force_reg (Pmode, exp);
14513 if (GET_MODE (exp) == Pmode)
14514 return copy_to_mode_reg (Pmode, exp);
14515 r = gen_reg_rtx (Pmode);
14516 emit_insn (gen_zero_extendsidi2 (r, exp));
14520 /* Divide COUNTREG by SCALE. */
14522 scale_counter (rtx countreg, int scale)
14525 rtx piece_size_mask;
14529 if (CONST_INT_P (countreg))
14530 return GEN_INT (INTVAL (countreg) / scale);
14531 gcc_assert (REG_P (countreg));
14533 piece_size_mask = GEN_INT (scale - 1);
14534 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14535 GEN_INT (exact_log2 (scale)),
14536 NULL, 1, OPTAB_DIRECT);
14540 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14541 DImode for constant loop counts. */
14543 static enum machine_mode
14544 counter_mode (rtx count_exp)
14546 if (GET_MODE (count_exp) != VOIDmode)
14547 return GET_MODE (count_exp);
14548 if (GET_CODE (count_exp) != CONST_INT)
14550 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14555 /* When SRCPTR is non-NULL, output simple loop to move memory
14556 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14557 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14558 equivalent loop to set memory by VALUE (supposed to be in MODE).
14560 The size is rounded down to whole number of chunk size moved at once.
14561 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14565 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14566 rtx destptr, rtx srcptr, rtx value,
14567 rtx count, enum machine_mode mode, int unroll,
14570 rtx out_label, top_label, iter, tmp;
14571 enum machine_mode iter_mode = counter_mode (count);
14572 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14573 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14579 top_label = gen_label_rtx ();
14580 out_label = gen_label_rtx ();
14581 iter = gen_reg_rtx (iter_mode);
14583 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14584 NULL, 1, OPTAB_DIRECT);
14585 /* Those two should combine. */
14586 if (piece_size == const1_rtx)
14588 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14590 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14592 emit_move_insn (iter, const0_rtx);
14594 emit_label (top_label);
14596 tmp = convert_modes (Pmode, iter_mode, iter, true);
14597 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14598 destmem = change_address (destmem, mode, x_addr);
14602 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14603 srcmem = change_address (srcmem, mode, y_addr);
14605 /* When unrolling for chips that reorder memory reads and writes,
14606 we can save registers by using single temporary.
14607 Also using 4 temporaries is overkill in 32bit mode. */
14608 if (!TARGET_64BIT && 0)
14610 for (i = 0; i < unroll; i++)
14615 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14617 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14619 emit_move_insn (destmem, srcmem);
14625 gcc_assert (unroll <= 4);
14626 for (i = 0; i < unroll; i++)
14628 tmpreg[i] = gen_reg_rtx (mode);
14632 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14634 emit_move_insn (tmpreg[i], srcmem);
14636 for (i = 0; i < unroll; i++)
14641 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14643 emit_move_insn (destmem, tmpreg[i]);
14648 for (i = 0; i < unroll; i++)
14652 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14653 emit_move_insn (destmem, value);
14656 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14657 true, OPTAB_LIB_WIDEN);
14659 emit_move_insn (iter, tmp);
14661 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14663 if (expected_size != -1)
14665 expected_size /= GET_MODE_SIZE (mode) * unroll;
14666 if (expected_size == 0)
14668 else if (expected_size > REG_BR_PROB_BASE)
14669 predict_jump (REG_BR_PROB_BASE - 1);
14671 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14674 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14675 iter = ix86_zero_extend_to_Pmode (iter);
14676 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14677 true, OPTAB_LIB_WIDEN);
14678 if (tmp != destptr)
14679 emit_move_insn (destptr, tmp);
14682 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14683 true, OPTAB_LIB_WIDEN);
14685 emit_move_insn (srcptr, tmp);
14687 emit_label (out_label);
14690 /* Output "rep; mov" instruction.
14691 Arguments have same meaning as for previous function */
14693 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14694 rtx destptr, rtx srcptr,
14696 enum machine_mode mode)
14702 /* If the size is known, it is shorter to use rep movs. */
14703 if (mode == QImode && CONST_INT_P (count)
14704 && !(INTVAL (count) & 3))
14707 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14708 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14709 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14710 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14711 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14712 if (mode != QImode)
14714 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14715 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14716 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14717 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14718 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14719 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14723 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14724 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14726 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14730 /* Output "rep; stos" instruction.
14731 Arguments have same meaning as for previous function */
14733 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14735 enum machine_mode mode)
14740 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14741 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14742 value = force_reg (mode, gen_lowpart (mode, value));
14743 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14744 if (mode != QImode)
14746 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14747 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14748 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14751 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14752 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14756 emit_strmov (rtx destmem, rtx srcmem,
14757 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14759 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14760 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14761 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14764 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14766 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14767 rtx destptr, rtx srcptr, rtx count, int max_size)
14770 if (CONST_INT_P (count))
14772 HOST_WIDE_INT countval = INTVAL (count);
14775 if ((countval & 0x10) && max_size > 16)
14779 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14780 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14783 gcc_unreachable ();
14786 if ((countval & 0x08) && max_size > 8)
14789 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14792 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14793 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14797 if ((countval & 0x04) && max_size > 4)
14799 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14802 if ((countval & 0x02) && max_size > 2)
14804 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14807 if ((countval & 0x01) && max_size > 1)
14809 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14816 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14817 count, 1, OPTAB_DIRECT);
14818 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14819 count, QImode, 1, 4);
14823 /* When there are stringops, we can cheaply increase dest and src pointers.
14824 Otherwise we save code size by maintaining offset (zero is readily
14825 available from preceding rep operation) and using x86 addressing modes.
14827 if (TARGET_SINGLE_STRINGOP)
14831 rtx label = ix86_expand_aligntest (count, 4, true);
14832 src = change_address (srcmem, SImode, srcptr);
14833 dest = change_address (destmem, SImode, destptr);
14834 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14835 emit_label (label);
14836 LABEL_NUSES (label) = 1;
14840 rtx label = ix86_expand_aligntest (count, 2, true);
14841 src = change_address (srcmem, HImode, srcptr);
14842 dest = change_address (destmem, HImode, destptr);
14843 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14844 emit_label (label);
14845 LABEL_NUSES (label) = 1;
14849 rtx label = ix86_expand_aligntest (count, 1, true);
14850 src = change_address (srcmem, QImode, srcptr);
14851 dest = change_address (destmem, QImode, destptr);
14852 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14853 emit_label (label);
14854 LABEL_NUSES (label) = 1;
14859 rtx offset = force_reg (Pmode, const0_rtx);
14864 rtx label = ix86_expand_aligntest (count, 4, true);
14865 src = change_address (srcmem, SImode, srcptr);
14866 dest = change_address (destmem, SImode, destptr);
14867 emit_move_insn (dest, src);
14868 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14869 true, OPTAB_LIB_WIDEN);
14871 emit_move_insn (offset, tmp);
14872 emit_label (label);
14873 LABEL_NUSES (label) = 1;
14877 rtx label = ix86_expand_aligntest (count, 2, true);
14878 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14879 src = change_address (srcmem, HImode, tmp);
14880 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14881 dest = change_address (destmem, HImode, tmp);
14882 emit_move_insn (dest, src);
14883 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14884 true, OPTAB_LIB_WIDEN);
14886 emit_move_insn (offset, tmp);
14887 emit_label (label);
14888 LABEL_NUSES (label) = 1;
14892 rtx label = ix86_expand_aligntest (count, 1, true);
14893 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14894 src = change_address (srcmem, QImode, tmp);
14895 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14896 dest = change_address (destmem, QImode, tmp);
14897 emit_move_insn (dest, src);
14898 emit_label (label);
14899 LABEL_NUSES (label) = 1;
14904 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14906 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14907 rtx count, int max_size)
14910 expand_simple_binop (counter_mode (count), AND, count,
14911 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14912 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14913 gen_lowpart (QImode, value), count, QImode,
14917 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14919 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14923 if (CONST_INT_P (count))
14925 HOST_WIDE_INT countval = INTVAL (count);
14928 if ((countval & 0x10) && max_size > 16)
14932 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14933 emit_insn (gen_strset (destptr, dest, value));
14934 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14935 emit_insn (gen_strset (destptr, dest, value));
14938 gcc_unreachable ();
14941 if ((countval & 0x08) && max_size > 8)
14945 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14946 emit_insn (gen_strset (destptr, dest, value));
14950 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14951 emit_insn (gen_strset (destptr, dest, value));
14952 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14953 emit_insn (gen_strset (destptr, dest, value));
14957 if ((countval & 0x04) && max_size > 4)
14959 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14960 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14963 if ((countval & 0x02) && max_size > 2)
14965 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14966 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14969 if ((countval & 0x01) && max_size > 1)
14971 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14972 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14979 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14984 rtx label = ix86_expand_aligntest (count, 16, true);
14987 dest = change_address (destmem, DImode, destptr);
14988 emit_insn (gen_strset (destptr, dest, value));
14989 emit_insn (gen_strset (destptr, dest, value));
14993 dest = change_address (destmem, SImode, destptr);
14994 emit_insn (gen_strset (destptr, dest, value));
14995 emit_insn (gen_strset (destptr, dest, value));
14996 emit_insn (gen_strset (destptr, dest, value));
14997 emit_insn (gen_strset (destptr, dest, value));
14999 emit_label (label);
15000 LABEL_NUSES (label) = 1;
15004 rtx label = ix86_expand_aligntest (count, 8, true);
15007 dest = change_address (destmem, DImode, destptr);
15008 emit_insn (gen_strset (destptr, dest, value));
15012 dest = change_address (destmem, SImode, destptr);
15013 emit_insn (gen_strset (destptr, dest, value));
15014 emit_insn (gen_strset (destptr, dest, value));
15016 emit_label (label);
15017 LABEL_NUSES (label) = 1;
15021 rtx label = ix86_expand_aligntest (count, 4, true);
15022 dest = change_address (destmem, SImode, destptr);
15023 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15024 emit_label (label);
15025 LABEL_NUSES (label) = 1;
15029 rtx label = ix86_expand_aligntest (count, 2, true);
15030 dest = change_address (destmem, HImode, destptr);
15031 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15032 emit_label (label);
15033 LABEL_NUSES (label) = 1;
15037 rtx label = ix86_expand_aligntest (count, 1, true);
15038 dest = change_address (destmem, QImode, destptr);
15039 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15040 emit_label (label);
15041 LABEL_NUSES (label) = 1;
15045 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15046 DESIRED_ALIGNMENT. */
15048 expand_movmem_prologue (rtx destmem, rtx srcmem,
15049 rtx destptr, rtx srcptr, rtx count,
15050 int align, int desired_alignment)
15052 if (align <= 1 && desired_alignment > 1)
15054 rtx label = ix86_expand_aligntest (destptr, 1, false);
15055 srcmem = change_address (srcmem, QImode, srcptr);
15056 destmem = change_address (destmem, QImode, destptr);
15057 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15058 ix86_adjust_counter (count, 1);
15059 emit_label (label);
15060 LABEL_NUSES (label) = 1;
15062 if (align <= 2 && desired_alignment > 2)
15064 rtx label = ix86_expand_aligntest (destptr, 2, false);
15065 srcmem = change_address (srcmem, HImode, srcptr);
15066 destmem = change_address (destmem, HImode, destptr);
15067 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15068 ix86_adjust_counter (count, 2);
15069 emit_label (label);
15070 LABEL_NUSES (label) = 1;
15072 if (align <= 4 && desired_alignment > 4)
15074 rtx label = ix86_expand_aligntest (destptr, 4, false);
15075 srcmem = change_address (srcmem, SImode, srcptr);
15076 destmem = change_address (destmem, SImode, destptr);
15077 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15078 ix86_adjust_counter (count, 4);
15079 emit_label (label);
15080 LABEL_NUSES (label) = 1;
15082 gcc_assert (desired_alignment <= 8);
15085 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15086 DESIRED_ALIGNMENT. */
15088 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15089 int align, int desired_alignment)
15091 if (align <= 1 && desired_alignment > 1)
15093 rtx label = ix86_expand_aligntest (destptr, 1, false);
15094 destmem = change_address (destmem, QImode, destptr);
15095 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15096 ix86_adjust_counter (count, 1);
15097 emit_label (label);
15098 LABEL_NUSES (label) = 1;
15100 if (align <= 2 && desired_alignment > 2)
15102 rtx label = ix86_expand_aligntest (destptr, 2, false);
15103 destmem = change_address (destmem, HImode, destptr);
15104 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15105 ix86_adjust_counter (count, 2);
15106 emit_label (label);
15107 LABEL_NUSES (label) = 1;
15109 if (align <= 4 && desired_alignment > 4)
15111 rtx label = ix86_expand_aligntest (destptr, 4, false);
15112 destmem = change_address (destmem, SImode, destptr);
15113 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15114 ix86_adjust_counter (count, 4);
15115 emit_label (label);
15116 LABEL_NUSES (label) = 1;
15118 gcc_assert (desired_alignment <= 8);
15121 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15122 static enum stringop_alg
15123 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15124 int *dynamic_check)
15126 const struct stringop_algs * algs;
15127 /* Algorithms using the rep prefix want at least edi and ecx;
15128 additionally, memset wants eax and memcpy wants esi. Don't
15129 consider such algorithms if the user has appropriated those
15130 registers for their own purposes. */
15131 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15133 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15135 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15136 || (alg != rep_prefix_1_byte \
15137 && alg != rep_prefix_4_byte \
15138 && alg != rep_prefix_8_byte))
15140 *dynamic_check = -1;
15142 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15144 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15145 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15146 return stringop_alg;
15147 /* rep; movq or rep; movl is the smallest variant. */
15148 else if (optimize_size)
15150 if (!count || (count & 3))
15151 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15153 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15155 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15157 else if (expected_size != -1 && expected_size < 4)
15158 return loop_1_byte;
15159 else if (expected_size != -1)
15162 enum stringop_alg alg = libcall;
15163 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15165 /* We get here if the algorithms that were not libcall-based
15166 were rep-prefix based and we are unable to use rep prefixes
15167 based on global register usage. Break out of the loop and
15168 use the heuristic below. */
15169 if (algs->size[i].max == 0)
15171 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15173 enum stringop_alg candidate = algs->size[i].alg;
15175 if (candidate != libcall && ALG_USABLE_P (candidate))
15177 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15178 last non-libcall inline algorithm. */
15179 if (TARGET_INLINE_ALL_STRINGOPS)
15181 /* When the current size is best to be copied by a libcall,
15182 but we are still forced to inline, run the heuristic below
15183 that will pick code for medium sized blocks. */
15184 if (alg != libcall)
15188 else if (ALG_USABLE_P (candidate))
15192 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15194 /* When asked to inline the call anyway, try to pick meaningful choice.
15195 We look for maximal size of block that is faster to copy by hand and
15196 take blocks of at most of that size guessing that average size will
15197 be roughly half of the block.
15199 If this turns out to be bad, we might simply specify the preferred
15200 choice in ix86_costs. */
15201 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15202 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15205 enum stringop_alg alg;
15207 bool any_alg_usable_p = true;
15209 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15211 enum stringop_alg candidate = algs->size[i].alg;
15212 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15214 if (candidate != libcall && candidate
15215 && ALG_USABLE_P (candidate))
15216 max = algs->size[i].max;
15218 /* If there aren't any usable algorithms, then recursing on
15219 smaller sizes isn't going to find anything. Just return the
15220 simple byte-at-a-time copy loop. */
15221 if (!any_alg_usable_p)
15223 /* Pick something reasonable. */
15224 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15225 *dynamic_check = 128;
15226 return loop_1_byte;
15230 alg = decide_alg (count, max / 2, memset, dynamic_check);
15231 gcc_assert (*dynamic_check == -1);
15232 gcc_assert (alg != libcall);
15233 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15234 *dynamic_check = max;
15237 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15238 #undef ALG_USABLE_P
15241 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15242 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15244 decide_alignment (int align,
15245 enum stringop_alg alg,
15248 int desired_align = 0;
15252 gcc_unreachable ();
15254 case unrolled_loop:
15255 desired_align = GET_MODE_SIZE (Pmode);
15257 case rep_prefix_8_byte:
15260 case rep_prefix_4_byte:
15261 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15262 copying whole cacheline at once. */
15263 if (TARGET_PENTIUMPRO)
15268 case rep_prefix_1_byte:
15269 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15270 copying whole cacheline at once. */
15271 if (TARGET_PENTIUMPRO)
15285 if (desired_align < align)
15286 desired_align = align;
15287 if (expected_size != -1 && expected_size < 4)
15288 desired_align = align;
15289 return desired_align;
15292 /* Return the smallest power of 2 greater than VAL. */
15294 smallest_pow2_greater_than (int val)
15302 /* Expand string move (memcpy) operation. Use i386 string operations when
15303 profitable. expand_setmem contains similar code. The code depends upon
15304 architecture, block size and alignment, but always has the same
15307 1) Prologue guard: Conditional that jumps up to epilogues for small
15308 blocks that can be handled by epilogue alone. This is faster but
15309 also needed for correctness, since prologue assume the block is larger
15310 than the desired alignment.
15312 Optional dynamic check for size and libcall for large
15313 blocks is emitted here too, with -minline-stringops-dynamically.
15315 2) Prologue: copy first few bytes in order to get destination aligned
15316 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15317 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15318 We emit either a jump tree on power of two sized blocks, or a byte loop.
15320 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15321 with specified algorithm.
15323 4) Epilogue: code copying tail of the block that is too small to be
15324 handled by main body (or up to size guarded by prologue guard). */
15327 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15328 rtx expected_align_exp, rtx expected_size_exp)
15334 rtx jump_around_label = NULL;
15335 HOST_WIDE_INT align = 1;
15336 unsigned HOST_WIDE_INT count = 0;
15337 HOST_WIDE_INT expected_size = -1;
15338 int size_needed = 0, epilogue_size_needed;
15339 int desired_align = 0;
15340 enum stringop_alg alg;
15343 if (CONST_INT_P (align_exp))
15344 align = INTVAL (align_exp);
15345 /* i386 can do misaligned access on reasonably increased cost. */
15346 if (CONST_INT_P (expected_align_exp)
15347 && INTVAL (expected_align_exp) > align)
15348 align = INTVAL (expected_align_exp);
15349 if (CONST_INT_P (count_exp))
15350 count = expected_size = INTVAL (count_exp);
15351 if (CONST_INT_P (expected_size_exp) && count == 0)
15352 expected_size = INTVAL (expected_size_exp);
15354 /* Make sure we don't need to care about overflow later on. */
15355 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15358 /* Step 0: Decide on preferred algorithm, desired alignment and
15359 size of chunks to be copied by main loop. */
15361 alg = decide_alg (count, expected_size, false, &dynamic_check);
15362 desired_align = decide_alignment (align, alg, expected_size);
15364 if (!TARGET_ALIGN_STRINGOPS)
15365 align = desired_align;
15367 if (alg == libcall)
15369 gcc_assert (alg != no_stringop);
15371 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15372 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15373 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15378 gcc_unreachable ();
15380 size_needed = GET_MODE_SIZE (Pmode);
15382 case unrolled_loop:
15383 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15385 case rep_prefix_8_byte:
15388 case rep_prefix_4_byte:
15391 case rep_prefix_1_byte:
15397 epilogue_size_needed = size_needed;
15399 /* Step 1: Prologue guard. */
15401 /* Alignment code needs count to be in register. */
15402 if (CONST_INT_P (count_exp) && desired_align > align)
15403 count_exp = force_reg (counter_mode (count_exp), count_exp);
15404 gcc_assert (desired_align >= 1 && align >= 1);
15406 /* Ensure that alignment prologue won't copy past end of block. */
15407 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15409 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15410 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15411 Make sure it is power of 2. */
15412 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15414 if (CONST_INT_P (count_exp))
15416 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15421 label = gen_label_rtx ();
15422 emit_cmp_and_jump_insns (count_exp,
15423 GEN_INT (epilogue_size_needed),
15424 LTU, 0, counter_mode (count_exp), 1, label);
15425 if (expected_size == -1 || expected_size < epilogue_size_needed)
15426 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15428 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15432 /* Emit code to decide on runtime whether library call or inline should be
15434 if (dynamic_check != -1)
15436 if (CONST_INT_P (count_exp))
15438 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15440 emit_block_move_via_libcall (dst, src, count_exp, false);
15441 count_exp = const0_rtx;
15447 rtx hot_label = gen_label_rtx ();
15448 jump_around_label = gen_label_rtx ();
15449 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15450 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15451 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15452 emit_block_move_via_libcall (dst, src, count_exp, false);
15453 emit_jump (jump_around_label);
15454 emit_label (hot_label);
15458 /* Step 2: Alignment prologue. */
15460 if (desired_align > align)
15462 /* Except for the first move in epilogue, we no longer know
15463 constant offset in aliasing info. It don't seems to worth
15464 the pain to maintain it for the first move, so throw away
15466 src = change_address (src, BLKmode, srcreg);
15467 dst = change_address (dst, BLKmode, destreg);
15468 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15471 if (label && size_needed == 1)
15473 emit_label (label);
15474 LABEL_NUSES (label) = 1;
15478 /* Step 3: Main loop. */
15484 gcc_unreachable ();
15486 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15487 count_exp, QImode, 1, expected_size);
15490 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15491 count_exp, Pmode, 1, expected_size);
15493 case unrolled_loop:
15494 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15495 registers for 4 temporaries anyway. */
15496 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15497 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15500 case rep_prefix_8_byte:
15501 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15504 case rep_prefix_4_byte:
15505 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15508 case rep_prefix_1_byte:
15509 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15513 /* Adjust properly the offset of src and dest memory for aliasing. */
15514 if (CONST_INT_P (count_exp))
15516 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15517 (count / size_needed) * size_needed);
15518 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15519 (count / size_needed) * size_needed);
15523 src = change_address (src, BLKmode, srcreg);
15524 dst = change_address (dst, BLKmode, destreg);
15527 /* Step 4: Epilogue to copy the remaining bytes. */
15531 /* When the main loop is done, COUNT_EXP might hold original count,
15532 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15533 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15534 bytes. Compensate if needed. */
15536 if (size_needed < epilogue_size_needed)
15539 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15540 GEN_INT (size_needed - 1), count_exp, 1,
15542 if (tmp != count_exp)
15543 emit_move_insn (count_exp, tmp);
15545 emit_label (label);
15546 LABEL_NUSES (label) = 1;
15549 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15550 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15551 epilogue_size_needed);
15552 if (jump_around_label)
15553 emit_label (jump_around_label);
15557 /* Helper function for memcpy. For QImode value 0xXY produce
15558 0xXYXYXYXY of wide specified by MODE. This is essentially
15559 a * 0x10101010, but we can do slightly better than
15560 synth_mult by unwinding the sequence by hand on CPUs with
15563 promote_duplicated_reg (enum machine_mode mode, rtx val)
15565 enum machine_mode valmode = GET_MODE (val);
15567 int nops = mode == DImode ? 3 : 2;
15569 gcc_assert (mode == SImode || mode == DImode);
15570 if (val == const0_rtx)
15571 return copy_to_mode_reg (mode, const0_rtx);
15572 if (CONST_INT_P (val))
15574 HOST_WIDE_INT v = INTVAL (val) & 255;
15578 if (mode == DImode)
15579 v |= (v << 16) << 16;
15580 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15583 if (valmode == VOIDmode)
15585 if (valmode != QImode)
15586 val = gen_lowpart (QImode, val);
15587 if (mode == QImode)
15589 if (!TARGET_PARTIAL_REG_STALL)
15591 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15592 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15593 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15594 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15596 rtx reg = convert_modes (mode, QImode, val, true);
15597 tmp = promote_duplicated_reg (mode, const1_rtx);
15598 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15603 rtx reg = convert_modes (mode, QImode, val, true);
15605 if (!TARGET_PARTIAL_REG_STALL)
15606 if (mode == SImode)
15607 emit_insn (gen_movsi_insv_1 (reg, reg));
15609 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15612 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15613 NULL, 1, OPTAB_DIRECT);
15615 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15617 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15618 NULL, 1, OPTAB_DIRECT);
15619 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15620 if (mode == SImode)
15622 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15623 NULL, 1, OPTAB_DIRECT);
15624 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15629 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15630 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15631 alignment from ALIGN to DESIRED_ALIGN. */
15633 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15638 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15639 promoted_val = promote_duplicated_reg (DImode, val);
15640 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15641 promoted_val = promote_duplicated_reg (SImode, val);
15642 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15643 promoted_val = promote_duplicated_reg (HImode, val);
15645 promoted_val = val;
15647 return promoted_val;
15650 /* Expand string clear operation (bzero). Use i386 string operations when
15651 profitable. See expand_movmem comment for explanation of individual
15652 steps performed. */
15654 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15655 rtx expected_align_exp, rtx expected_size_exp)
15660 rtx jump_around_label = NULL;
15661 HOST_WIDE_INT align = 1;
15662 unsigned HOST_WIDE_INT count = 0;
15663 HOST_WIDE_INT expected_size = -1;
15664 int size_needed = 0, epilogue_size_needed;
15665 int desired_align = 0;
15666 enum stringop_alg alg;
15667 rtx promoted_val = NULL;
15668 bool force_loopy_epilogue = false;
15671 if (CONST_INT_P (align_exp))
15672 align = INTVAL (align_exp);
15673 /* i386 can do misaligned access on reasonably increased cost. */
15674 if (CONST_INT_P (expected_align_exp)
15675 && INTVAL (expected_align_exp) > align)
15676 align = INTVAL (expected_align_exp);
15677 if (CONST_INT_P (count_exp))
15678 count = expected_size = INTVAL (count_exp);
15679 if (CONST_INT_P (expected_size_exp) && count == 0)
15680 expected_size = INTVAL (expected_size_exp);
15682 /* Make sure we don't need to care about overflow later on. */
15683 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15686 /* Step 0: Decide on preferred algorithm, desired alignment and
15687 size of chunks to be copied by main loop. */
15689 alg = decide_alg (count, expected_size, true, &dynamic_check);
15690 desired_align = decide_alignment (align, alg, expected_size);
15692 if (!TARGET_ALIGN_STRINGOPS)
15693 align = desired_align;
15695 if (alg == libcall)
15697 gcc_assert (alg != no_stringop);
15699 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15700 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15705 gcc_unreachable ();
15707 size_needed = GET_MODE_SIZE (Pmode);
15709 case unrolled_loop:
15710 size_needed = GET_MODE_SIZE (Pmode) * 4;
15712 case rep_prefix_8_byte:
15715 case rep_prefix_4_byte:
15718 case rep_prefix_1_byte:
15723 epilogue_size_needed = size_needed;
15725 /* Step 1: Prologue guard. */
15727 /* Alignment code needs count to be in register. */
15728 if (CONST_INT_P (count_exp) && desired_align > align)
15730 enum machine_mode mode = SImode;
15731 if (TARGET_64BIT && (count & ~0xffffffff))
15733 count_exp = force_reg (mode, count_exp);
15735 /* Do the cheap promotion to allow better CSE across the
15736 main loop and epilogue (ie one load of the big constant in the
15737 front of all code. */
15738 if (CONST_INT_P (val_exp))
15739 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15740 desired_align, align);
15741 /* Ensure that alignment prologue won't copy past end of block. */
15742 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15744 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15745 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15746 Make sure it is power of 2. */
15747 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15749 /* To improve performance of small blocks, we jump around the VAL
15750 promoting mode. This mean that if the promoted VAL is not constant,
15751 we might not use it in the epilogue and have to use byte
15753 if (epilogue_size_needed > 2 && !promoted_val)
15754 force_loopy_epilogue = true;
15755 label = gen_label_rtx ();
15756 emit_cmp_and_jump_insns (count_exp,
15757 GEN_INT (epilogue_size_needed),
15758 LTU, 0, counter_mode (count_exp), 1, label);
15759 if (GET_CODE (count_exp) == CONST_INT)
15761 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15762 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15764 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15766 if (dynamic_check != -1)
15768 rtx hot_label = gen_label_rtx ();
15769 jump_around_label = gen_label_rtx ();
15770 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15771 LEU, 0, counter_mode (count_exp), 1, hot_label);
15772 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15773 set_storage_via_libcall (dst, count_exp, val_exp, false);
15774 emit_jump (jump_around_label);
15775 emit_label (hot_label);
15778 /* Step 2: Alignment prologue. */
15780 /* Do the expensive promotion once we branched off the small blocks. */
15782 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15783 desired_align, align);
15784 gcc_assert (desired_align >= 1 && align >= 1);
15786 if (desired_align > align)
15788 /* Except for the first move in epilogue, we no longer know
15789 constant offset in aliasing info. It don't seems to worth
15790 the pain to maintain it for the first move, so throw away
15792 dst = change_address (dst, BLKmode, destreg);
15793 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15796 if (label && size_needed == 1)
15798 emit_label (label);
15799 LABEL_NUSES (label) = 1;
15803 /* Step 3: Main loop. */
15809 gcc_unreachable ();
15811 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15812 count_exp, QImode, 1, expected_size);
15815 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15816 count_exp, Pmode, 1, expected_size);
15818 case unrolled_loop:
15819 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15820 count_exp, Pmode, 4, expected_size);
15822 case rep_prefix_8_byte:
15823 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15826 case rep_prefix_4_byte:
15827 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15830 case rep_prefix_1_byte:
15831 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15835 /* Adjust properly the offset of src and dest memory for aliasing. */
15836 if (CONST_INT_P (count_exp))
15837 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15838 (count / size_needed) * size_needed);
15840 dst = change_address (dst, BLKmode, destreg);
15842 /* Step 4: Epilogue to copy the remaining bytes. */
15846 /* When the main loop is done, COUNT_EXP might hold original count,
15847 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15848 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15849 bytes. Compensate if needed. */
15851 if (size_needed < desired_align - align)
15854 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15855 GEN_INT (size_needed - 1), count_exp, 1,
15857 size_needed = desired_align - align + 1;
15858 if (tmp != count_exp)
15859 emit_move_insn (count_exp, tmp);
15861 emit_label (label);
15862 LABEL_NUSES (label) = 1;
15864 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15866 if (force_loopy_epilogue)
15867 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15870 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15873 if (jump_around_label)
15874 emit_label (jump_around_label);
15878 /* Expand the appropriate insns for doing strlen if not just doing
15881 out = result, initialized with the start address
15882 align_rtx = alignment of the address.
15883 scratch = scratch register, initialized with the startaddress when
15884 not aligned, otherwise undefined
15886 This is just the body. It needs the initializations mentioned above and
15887 some address computing at the end. These things are done in i386.md. */
15890 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15894 rtx align_2_label = NULL_RTX;
15895 rtx align_3_label = NULL_RTX;
15896 rtx align_4_label = gen_label_rtx ();
15897 rtx end_0_label = gen_label_rtx ();
15899 rtx tmpreg = gen_reg_rtx (SImode);
15900 rtx scratch = gen_reg_rtx (SImode);
15904 if (CONST_INT_P (align_rtx))
15905 align = INTVAL (align_rtx);
15907 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15909 /* Is there a known alignment and is it less than 4? */
15912 rtx scratch1 = gen_reg_rtx (Pmode);
15913 emit_move_insn (scratch1, out);
15914 /* Is there a known alignment and is it not 2? */
15917 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15918 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15920 /* Leave just the 3 lower bits. */
15921 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15922 NULL_RTX, 0, OPTAB_WIDEN);
15924 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15925 Pmode, 1, align_4_label);
15926 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15927 Pmode, 1, align_2_label);
15928 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15929 Pmode, 1, align_3_label);
15933 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15934 check if is aligned to 4 - byte. */
15936 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15937 NULL_RTX, 0, OPTAB_WIDEN);
15939 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15940 Pmode, 1, align_4_label);
15943 mem = change_address (src, QImode, out);
15945 /* Now compare the bytes. */
15947 /* Compare the first n unaligned byte on a byte per byte basis. */
15948 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15949 QImode, 1, end_0_label);
15951 /* Increment the address. */
15953 emit_insn (gen_adddi3 (out, out, const1_rtx));
15955 emit_insn (gen_addsi3 (out, out, const1_rtx));
15957 /* Not needed with an alignment of 2 */
15960 emit_label (align_2_label);
15962 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15966 emit_insn (gen_adddi3 (out, out, const1_rtx));
15968 emit_insn (gen_addsi3 (out, out, const1_rtx));
15970 emit_label (align_3_label);
15973 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15977 emit_insn (gen_adddi3 (out, out, const1_rtx));
15979 emit_insn (gen_addsi3 (out, out, const1_rtx));
15982 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15983 align this loop. It gives only huge programs, but does not help to
15985 emit_label (align_4_label);
15987 mem = change_address (src, SImode, out);
15988 emit_move_insn (scratch, mem);
15990 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15992 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15994 /* This formula yields a nonzero result iff one of the bytes is zero.
15995 This saves three branches inside loop and many cycles. */
15997 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15998 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15999 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16000 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16001 gen_int_mode (0x80808080, SImode)));
16002 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16007 rtx reg = gen_reg_rtx (SImode);
16008 rtx reg2 = gen_reg_rtx (Pmode);
16009 emit_move_insn (reg, tmpreg);
16010 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16012 /* If zero is not in the first two bytes, move two bytes forward. */
16013 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16014 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16015 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16016 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16017 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16020 /* Emit lea manually to avoid clobbering of flags. */
16021 emit_insn (gen_rtx_SET (SImode, reg2,
16022 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16024 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16025 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16026 emit_insn (gen_rtx_SET (VOIDmode, out,
16027 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16034 rtx end_2_label = gen_label_rtx ();
16035 /* Is zero in the first two bytes? */
16037 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16038 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16039 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16040 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16041 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16043 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16044 JUMP_LABEL (tmp) = end_2_label;
16046 /* Not in the first two. Move two bytes forward. */
16047 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16049 emit_insn (gen_adddi3 (out, out, const2_rtx));
16051 emit_insn (gen_addsi3 (out, out, const2_rtx));
16053 emit_label (end_2_label);
16057 /* Avoid branch in fixing the byte. */
16058 tmpreg = gen_lowpart (QImode, tmpreg);
16059 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16060 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16062 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16064 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16066 emit_label (end_0_label);
16069 /* Expand strlen. */
16072 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16074 rtx addr, scratch1, scratch2, scratch3, scratch4;
16076 /* The generic case of strlen expander is long. Avoid it's
16077 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16079 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16080 && !TARGET_INLINE_ALL_STRINGOPS
16082 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16085 addr = force_reg (Pmode, XEXP (src, 0));
16086 scratch1 = gen_reg_rtx (Pmode);
16088 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16091 /* Well it seems that some optimizer does not combine a call like
16092 foo(strlen(bar), strlen(bar));
16093 when the move and the subtraction is done here. It does calculate
16094 the length just once when these instructions are done inside of
16095 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16096 often used and I use one fewer register for the lifetime of
16097 output_strlen_unroll() this is better. */
16099 emit_move_insn (out, addr);
16101 ix86_expand_strlensi_unroll_1 (out, src, align);
16103 /* strlensi_unroll_1 returns the address of the zero at the end of
16104 the string, like memchr(), so compute the length by subtracting
16105 the start address. */
16107 emit_insn (gen_subdi3 (out, out, addr));
16109 emit_insn (gen_subsi3 (out, out, addr));
16115 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16116 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16119 scratch2 = gen_reg_rtx (Pmode);
16120 scratch3 = gen_reg_rtx (Pmode);
16121 scratch4 = force_reg (Pmode, constm1_rtx);
16123 emit_move_insn (scratch3, addr);
16124 eoschar = force_reg (QImode, eoschar);
16126 src = replace_equiv_address_nv (src, scratch3);
16128 /* If .md starts supporting :P, this can be done in .md. */
16129 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16130 scratch4), UNSPEC_SCAS);
16131 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16134 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16135 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16139 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16140 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16146 /* For given symbol (function) construct code to compute address of it's PLT
16147 entry in large x86-64 PIC model. */
16149 construct_plt_address (rtx symbol)
16151 rtx tmp = gen_reg_rtx (Pmode);
16152 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16154 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16155 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16157 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16158 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16163 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16164 rtx callarg2 ATTRIBUTE_UNUSED,
16165 rtx pop, int sibcall)
16167 rtx use = NULL, call;
16169 if (pop == const0_rtx)
16171 gcc_assert (!TARGET_64BIT || !pop);
16173 if (TARGET_MACHO && !TARGET_64BIT)
16176 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16177 fnaddr = machopic_indirect_call_target (fnaddr);
16182 /* Static functions and indirect calls don't need the pic register. */
16183 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16184 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16185 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16186 use_reg (&use, pic_offset_table_rtx);
16189 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16191 rtx al = gen_rtx_REG (QImode, AX_REG);
16192 emit_move_insn (al, callarg2);
16193 use_reg (&use, al);
16196 if (ix86_cmodel == CM_LARGE_PIC
16197 && GET_CODE (fnaddr) == MEM
16198 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16199 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16200 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16201 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16203 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16204 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16206 if (sibcall && TARGET_64BIT
16207 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16210 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16211 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16212 emit_move_insn (fnaddr, addr);
16213 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16216 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16218 call = gen_rtx_SET (VOIDmode, retval, call);
16221 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16222 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16223 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16226 call = emit_call_insn (call);
16228 CALL_INSN_FUNCTION_USAGE (call) = use;
16232 /* Clear stack slot assignments remembered from previous functions.
16233 This is called from INIT_EXPANDERS once before RTL is emitted for each
16236 static struct machine_function *
16237 ix86_init_machine_status (void)
16239 struct machine_function *f;
16241 f = GGC_CNEW (struct machine_function);
16242 f->use_fast_prologue_epilogue_nregs = -1;
16243 f->tls_descriptor_call_expanded_p = 0;
16248 /* Return a MEM corresponding to a stack slot with mode MODE.
16249 Allocate a new slot if necessary.
16251 The RTL for a function can have several slots available: N is
16252 which slot to use. */
16255 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16257 struct stack_local_entry *s;
16259 gcc_assert (n < MAX_386_STACK_LOCALS);
16261 /* Virtual slot is valid only before vregs are instantiated. */
16262 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16264 for (s = ix86_stack_locals; s; s = s->next)
16265 if (s->mode == mode && s->n == n)
16266 return copy_rtx (s->rtl);
16268 s = (struct stack_local_entry *)
16269 ggc_alloc (sizeof (struct stack_local_entry));
16272 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16274 s->next = ix86_stack_locals;
16275 ix86_stack_locals = s;
16279 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16281 static GTY(()) rtx ix86_tls_symbol;
16283 ix86_tls_get_addr (void)
16286 if (!ix86_tls_symbol)
16288 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16289 (TARGET_ANY_GNU_TLS
16291 ? "___tls_get_addr"
16292 : "__tls_get_addr");
16295 return ix86_tls_symbol;
16298 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16300 static GTY(()) rtx ix86_tls_module_base_symbol;
16302 ix86_tls_module_base (void)
16305 if (!ix86_tls_module_base_symbol)
16307 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16308 "_TLS_MODULE_BASE_");
16309 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16310 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16313 return ix86_tls_module_base_symbol;
16316 /* Calculate the length of the memory address in the instruction
16317 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16320 memory_address_length (rtx addr)
16322 struct ix86_address parts;
16323 rtx base, index, disp;
16327 if (GET_CODE (addr) == PRE_DEC
16328 || GET_CODE (addr) == POST_INC
16329 || GET_CODE (addr) == PRE_MODIFY
16330 || GET_CODE (addr) == POST_MODIFY)
16333 ok = ix86_decompose_address (addr, &parts);
16336 if (parts.base && GET_CODE (parts.base) == SUBREG)
16337 parts.base = SUBREG_REG (parts.base);
16338 if (parts.index && GET_CODE (parts.index) == SUBREG)
16339 parts.index = SUBREG_REG (parts.index);
16342 index = parts.index;
16347 - esp as the base always wants an index,
16348 - ebp as the base always wants a displacement. */
16350 /* Register Indirect. */
16351 if (base && !index && !disp)
16353 /* esp (for its index) and ebp (for its displacement) need
16354 the two-byte modrm form. */
16355 if (addr == stack_pointer_rtx
16356 || addr == arg_pointer_rtx
16357 || addr == frame_pointer_rtx
16358 || addr == hard_frame_pointer_rtx)
16362 /* Direct Addressing. */
16363 else if (disp && !base && !index)
16368 /* Find the length of the displacement constant. */
16371 if (base && satisfies_constraint_K (disp))
16376 /* ebp always wants a displacement. */
16377 else if (base == hard_frame_pointer_rtx)
16380 /* An index requires the two-byte modrm form.... */
16382 /* ...like esp, which always wants an index. */
16383 || base == stack_pointer_rtx
16384 || base == arg_pointer_rtx
16385 || base == frame_pointer_rtx)
16392 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16393 is set, expect that insn have 8bit immediate alternative. */
16395 ix86_attr_length_immediate_default (rtx insn, int shortform)
16399 extract_insn_cached (insn);
16400 for (i = recog_data.n_operands - 1; i >= 0; --i)
16401 if (CONSTANT_P (recog_data.operand[i]))
16404 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16408 switch (get_attr_mode (insn))
16419 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16424 fatal_insn ("unknown insn mode", insn);
16430 /* Compute default value for "length_address" attribute. */
16432 ix86_attr_length_address_default (rtx insn)
16436 if (get_attr_type (insn) == TYPE_LEA)
16438 rtx set = PATTERN (insn);
16440 if (GET_CODE (set) == PARALLEL)
16441 set = XVECEXP (set, 0, 0);
16443 gcc_assert (GET_CODE (set) == SET);
16445 return memory_address_length (SET_SRC (set));
16448 extract_insn_cached (insn);
16449 for (i = recog_data.n_operands - 1; i >= 0; --i)
16450 if (MEM_P (recog_data.operand[i]))
16452 return memory_address_length (XEXP (recog_data.operand[i], 0));
16458 /* Return the maximum number of instructions a cpu can issue. */
16461 ix86_issue_rate (void)
16465 case PROCESSOR_PENTIUM:
16469 case PROCESSOR_PENTIUMPRO:
16470 case PROCESSOR_PENTIUM4:
16471 case PROCESSOR_ATHLON:
16473 case PROCESSOR_AMDFAM10:
16474 case PROCESSOR_NOCONA:
16475 case PROCESSOR_GENERIC32:
16476 case PROCESSOR_GENERIC64:
16479 case PROCESSOR_CORE2:
16487 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16488 by DEP_INSN and nothing set by DEP_INSN. */
16491 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16495 /* Simplify the test for uninteresting insns. */
16496 if (insn_type != TYPE_SETCC
16497 && insn_type != TYPE_ICMOV
16498 && insn_type != TYPE_FCMOV
16499 && insn_type != TYPE_IBR)
16502 if ((set = single_set (dep_insn)) != 0)
16504 set = SET_DEST (set);
16507 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16508 && XVECLEN (PATTERN (dep_insn), 0) == 2
16509 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16510 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16512 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16513 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16518 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16521 /* This test is true if the dependent insn reads the flags but
16522 not any other potentially set register. */
16523 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16526 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16532 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16533 address with operands set by DEP_INSN. */
16536 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16540 if (insn_type == TYPE_LEA
16543 addr = PATTERN (insn);
16545 if (GET_CODE (addr) == PARALLEL)
16546 addr = XVECEXP (addr, 0, 0);
16548 gcc_assert (GET_CODE (addr) == SET);
16550 addr = SET_SRC (addr);
16555 extract_insn_cached (insn);
16556 for (i = recog_data.n_operands - 1; i >= 0; --i)
16557 if (MEM_P (recog_data.operand[i]))
16559 addr = XEXP (recog_data.operand[i], 0);
16566 return modified_in_p (addr, dep_insn);
16570 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16572 enum attr_type insn_type, dep_insn_type;
16573 enum attr_memory memory;
16575 int dep_insn_code_number;
16577 /* Anti and output dependencies have zero cost on all CPUs. */
16578 if (REG_NOTE_KIND (link) != 0)
16581 dep_insn_code_number = recog_memoized (dep_insn);
16583 /* If we can't recognize the insns, we can't really do anything. */
16584 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16587 insn_type = get_attr_type (insn);
16588 dep_insn_type = get_attr_type (dep_insn);
16592 case PROCESSOR_PENTIUM:
16593 /* Address Generation Interlock adds a cycle of latency. */
16594 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16597 /* ??? Compares pair with jump/setcc. */
16598 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16601 /* Floating point stores require value to be ready one cycle earlier. */
16602 if (insn_type == TYPE_FMOV
16603 && get_attr_memory (insn) == MEMORY_STORE
16604 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16608 case PROCESSOR_PENTIUMPRO:
16609 memory = get_attr_memory (insn);
16611 /* INT->FP conversion is expensive. */
16612 if (get_attr_fp_int_src (dep_insn))
16615 /* There is one cycle extra latency between an FP op and a store. */
16616 if (insn_type == TYPE_FMOV
16617 && (set = single_set (dep_insn)) != NULL_RTX
16618 && (set2 = single_set (insn)) != NULL_RTX
16619 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16620 && MEM_P (SET_DEST (set2)))
16623 /* Show ability of reorder buffer to hide latency of load by executing
16624 in parallel with previous instruction in case
16625 previous instruction is not needed to compute the address. */
16626 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16627 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16629 /* Claim moves to take one cycle, as core can issue one load
16630 at time and the next load can start cycle later. */
16631 if (dep_insn_type == TYPE_IMOV
16632 || dep_insn_type == TYPE_FMOV)
16640 memory = get_attr_memory (insn);
16642 /* The esp dependency is resolved before the instruction is really
16644 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16645 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16648 /* INT->FP conversion is expensive. */
16649 if (get_attr_fp_int_src (dep_insn))
16652 /* Show ability of reorder buffer to hide latency of load by executing
16653 in parallel with previous instruction in case
16654 previous instruction is not needed to compute the address. */
16655 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16656 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16658 /* Claim moves to take one cycle, as core can issue one load
16659 at time and the next load can start cycle later. */
16660 if (dep_insn_type == TYPE_IMOV
16661 || dep_insn_type == TYPE_FMOV)
16670 case PROCESSOR_ATHLON:
16672 case PROCESSOR_AMDFAM10:
16673 case PROCESSOR_GENERIC32:
16674 case PROCESSOR_GENERIC64:
16675 memory = get_attr_memory (insn);
16677 /* Show ability of reorder buffer to hide latency of load by executing
16678 in parallel with previous instruction in case
16679 previous instruction is not needed to compute the address. */
16680 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16681 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16683 enum attr_unit unit = get_attr_unit (insn);
16686 /* Because of the difference between the length of integer and
16687 floating unit pipeline preparation stages, the memory operands
16688 for floating point are cheaper.
16690 ??? For Athlon it the difference is most probably 2. */
16691 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16694 loadcost = TARGET_ATHLON ? 2 : 0;
16696 if (cost >= loadcost)
16709 /* How many alternative schedules to try. This should be as wide as the
16710 scheduling freedom in the DFA, but no wider. Making this value too
16711 large results extra work for the scheduler. */
16714 ia32_multipass_dfa_lookahead (void)
16718 case PROCESSOR_PENTIUM:
16721 case PROCESSOR_PENTIUMPRO:
16731 /* Compute the alignment given to a constant that is being placed in memory.
16732 EXP is the constant and ALIGN is the alignment that the object would
16734 The value of this function is used instead of that alignment to align
16738 ix86_constant_alignment (tree exp, int align)
16740 if (TREE_CODE (exp) == REAL_CST)
16742 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16744 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16747 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16748 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16749 return BITS_PER_WORD;
16754 /* Compute the alignment for a static variable.
16755 TYPE is the data type, and ALIGN is the alignment that
16756 the object would ordinarily have. The value of this function is used
16757 instead of that alignment to align the object. */
16760 ix86_data_alignment (tree type, int align)
16762 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16764 if (AGGREGATE_TYPE_P (type)
16765 && TYPE_SIZE (type)
16766 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16767 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16768 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16769 && align < max_align)
16772 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16773 to 16byte boundary. */
16776 if (AGGREGATE_TYPE_P (type)
16777 && TYPE_SIZE (type)
16778 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16779 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16780 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16784 if (TREE_CODE (type) == ARRAY_TYPE)
16786 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16788 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16791 else if (TREE_CODE (type) == COMPLEX_TYPE)
16794 if (TYPE_MODE (type) == DCmode && align < 64)
16796 if (TYPE_MODE (type) == XCmode && align < 128)
16799 else if ((TREE_CODE (type) == RECORD_TYPE
16800 || TREE_CODE (type) == UNION_TYPE
16801 || TREE_CODE (type) == QUAL_UNION_TYPE)
16802 && TYPE_FIELDS (type))
16804 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16806 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16809 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16810 || TREE_CODE (type) == INTEGER_TYPE)
16812 if (TYPE_MODE (type) == DFmode && align < 64)
16814 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16821 /* Compute the alignment for a local variable.
16822 TYPE is the data type, and ALIGN is the alignment that
16823 the object would ordinarily have. The value of this macro is used
16824 instead of that alignment to align the object. */
16827 ix86_local_alignment (tree type, int align)
16829 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16830 to 16byte boundary. */
16833 if (AGGREGATE_TYPE_P (type)
16834 && TYPE_SIZE (type)
16835 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16836 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16837 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16840 if (TREE_CODE (type) == ARRAY_TYPE)
16842 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16844 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16847 else if (TREE_CODE (type) == COMPLEX_TYPE)
16849 if (TYPE_MODE (type) == DCmode && align < 64)
16851 if (TYPE_MODE (type) == XCmode && align < 128)
16854 else if ((TREE_CODE (type) == RECORD_TYPE
16855 || TREE_CODE (type) == UNION_TYPE
16856 || TREE_CODE (type) == QUAL_UNION_TYPE)
16857 && TYPE_FIELDS (type))
16859 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16861 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16864 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16865 || TREE_CODE (type) == INTEGER_TYPE)
16868 if (TYPE_MODE (type) == DFmode && align < 64)
16870 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16876 /* Emit RTL insns to initialize the variable parts of a trampoline.
16877 FNADDR is an RTX for the address of the function's pure code.
16878 CXT is an RTX for the static chain value for the function. */
16880 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16884 /* Compute offset from the end of the jmp to the target function. */
16885 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16886 plus_constant (tramp, 10),
16887 NULL_RTX, 1, OPTAB_DIRECT);
16888 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16889 gen_int_mode (0xb9, QImode));
16890 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16891 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16892 gen_int_mode (0xe9, QImode));
16893 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16898 /* Try to load address using shorter movl instead of movabs.
16899 We may want to support movq for kernel mode, but kernel does not use
16900 trampolines at the moment. */
16901 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16903 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16904 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16905 gen_int_mode (0xbb41, HImode));
16906 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16907 gen_lowpart (SImode, fnaddr));
16912 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16913 gen_int_mode (0xbb49, HImode));
16914 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16918 /* Load static chain using movabs to r10. */
16919 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16920 gen_int_mode (0xba49, HImode));
16921 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16924 /* Jump to the r11 */
16925 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16926 gen_int_mode (0xff49, HImode));
16927 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16928 gen_int_mode (0xe3, QImode));
16930 gcc_assert (offset <= TRAMPOLINE_SIZE);
16933 #ifdef ENABLE_EXECUTE_STACK
16934 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16935 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16939 /* Codes for all the SSE/MMX builtins. */
16942 IX86_BUILTIN_ADDPS,
16943 IX86_BUILTIN_ADDSS,
16944 IX86_BUILTIN_DIVPS,
16945 IX86_BUILTIN_DIVSS,
16946 IX86_BUILTIN_MULPS,
16947 IX86_BUILTIN_MULSS,
16948 IX86_BUILTIN_SUBPS,
16949 IX86_BUILTIN_SUBSS,
16951 IX86_BUILTIN_CMPEQPS,
16952 IX86_BUILTIN_CMPLTPS,
16953 IX86_BUILTIN_CMPLEPS,
16954 IX86_BUILTIN_CMPGTPS,
16955 IX86_BUILTIN_CMPGEPS,
16956 IX86_BUILTIN_CMPNEQPS,
16957 IX86_BUILTIN_CMPNLTPS,
16958 IX86_BUILTIN_CMPNLEPS,
16959 IX86_BUILTIN_CMPNGTPS,
16960 IX86_BUILTIN_CMPNGEPS,
16961 IX86_BUILTIN_CMPORDPS,
16962 IX86_BUILTIN_CMPUNORDPS,
16963 IX86_BUILTIN_CMPEQSS,
16964 IX86_BUILTIN_CMPLTSS,
16965 IX86_BUILTIN_CMPLESS,
16966 IX86_BUILTIN_CMPNEQSS,
16967 IX86_BUILTIN_CMPNLTSS,
16968 IX86_BUILTIN_CMPNLESS,
16969 IX86_BUILTIN_CMPNGTSS,
16970 IX86_BUILTIN_CMPNGESS,
16971 IX86_BUILTIN_CMPORDSS,
16972 IX86_BUILTIN_CMPUNORDSS,
16974 IX86_BUILTIN_COMIEQSS,
16975 IX86_BUILTIN_COMILTSS,
16976 IX86_BUILTIN_COMILESS,
16977 IX86_BUILTIN_COMIGTSS,
16978 IX86_BUILTIN_COMIGESS,
16979 IX86_BUILTIN_COMINEQSS,
16980 IX86_BUILTIN_UCOMIEQSS,
16981 IX86_BUILTIN_UCOMILTSS,
16982 IX86_BUILTIN_UCOMILESS,
16983 IX86_BUILTIN_UCOMIGTSS,
16984 IX86_BUILTIN_UCOMIGESS,
16985 IX86_BUILTIN_UCOMINEQSS,
16987 IX86_BUILTIN_CVTPI2PS,
16988 IX86_BUILTIN_CVTPS2PI,
16989 IX86_BUILTIN_CVTSI2SS,
16990 IX86_BUILTIN_CVTSI642SS,
16991 IX86_BUILTIN_CVTSS2SI,
16992 IX86_BUILTIN_CVTSS2SI64,
16993 IX86_BUILTIN_CVTTPS2PI,
16994 IX86_BUILTIN_CVTTSS2SI,
16995 IX86_BUILTIN_CVTTSS2SI64,
16997 IX86_BUILTIN_MAXPS,
16998 IX86_BUILTIN_MAXSS,
16999 IX86_BUILTIN_MINPS,
17000 IX86_BUILTIN_MINSS,
17002 IX86_BUILTIN_LOADUPS,
17003 IX86_BUILTIN_STOREUPS,
17004 IX86_BUILTIN_MOVSS,
17006 IX86_BUILTIN_MOVHLPS,
17007 IX86_BUILTIN_MOVLHPS,
17008 IX86_BUILTIN_LOADHPS,
17009 IX86_BUILTIN_LOADLPS,
17010 IX86_BUILTIN_STOREHPS,
17011 IX86_BUILTIN_STORELPS,
17013 IX86_BUILTIN_MASKMOVQ,
17014 IX86_BUILTIN_MOVMSKPS,
17015 IX86_BUILTIN_PMOVMSKB,
17017 IX86_BUILTIN_MOVNTPS,
17018 IX86_BUILTIN_MOVNTQ,
17020 IX86_BUILTIN_LOADDQU,
17021 IX86_BUILTIN_STOREDQU,
17023 IX86_BUILTIN_PACKSSWB,
17024 IX86_BUILTIN_PACKSSDW,
17025 IX86_BUILTIN_PACKUSWB,
17027 IX86_BUILTIN_PADDB,
17028 IX86_BUILTIN_PADDW,
17029 IX86_BUILTIN_PADDD,
17030 IX86_BUILTIN_PADDQ,
17031 IX86_BUILTIN_PADDSB,
17032 IX86_BUILTIN_PADDSW,
17033 IX86_BUILTIN_PADDUSB,
17034 IX86_BUILTIN_PADDUSW,
17035 IX86_BUILTIN_PSUBB,
17036 IX86_BUILTIN_PSUBW,
17037 IX86_BUILTIN_PSUBD,
17038 IX86_BUILTIN_PSUBQ,
17039 IX86_BUILTIN_PSUBSB,
17040 IX86_BUILTIN_PSUBSW,
17041 IX86_BUILTIN_PSUBUSB,
17042 IX86_BUILTIN_PSUBUSW,
17045 IX86_BUILTIN_PANDN,
17049 IX86_BUILTIN_PAVGB,
17050 IX86_BUILTIN_PAVGW,
17052 IX86_BUILTIN_PCMPEQB,
17053 IX86_BUILTIN_PCMPEQW,
17054 IX86_BUILTIN_PCMPEQD,
17055 IX86_BUILTIN_PCMPGTB,
17056 IX86_BUILTIN_PCMPGTW,
17057 IX86_BUILTIN_PCMPGTD,
17059 IX86_BUILTIN_PMADDWD,
17061 IX86_BUILTIN_PMAXSW,
17062 IX86_BUILTIN_PMAXUB,
17063 IX86_BUILTIN_PMINSW,
17064 IX86_BUILTIN_PMINUB,
17066 IX86_BUILTIN_PMULHUW,
17067 IX86_BUILTIN_PMULHW,
17068 IX86_BUILTIN_PMULLW,
17070 IX86_BUILTIN_PSADBW,
17071 IX86_BUILTIN_PSHUFW,
17073 IX86_BUILTIN_PSLLW,
17074 IX86_BUILTIN_PSLLD,
17075 IX86_BUILTIN_PSLLQ,
17076 IX86_BUILTIN_PSRAW,
17077 IX86_BUILTIN_PSRAD,
17078 IX86_BUILTIN_PSRLW,
17079 IX86_BUILTIN_PSRLD,
17080 IX86_BUILTIN_PSRLQ,
17081 IX86_BUILTIN_PSLLWI,
17082 IX86_BUILTIN_PSLLDI,
17083 IX86_BUILTIN_PSLLQI,
17084 IX86_BUILTIN_PSRAWI,
17085 IX86_BUILTIN_PSRADI,
17086 IX86_BUILTIN_PSRLWI,
17087 IX86_BUILTIN_PSRLDI,
17088 IX86_BUILTIN_PSRLQI,
17090 IX86_BUILTIN_PUNPCKHBW,
17091 IX86_BUILTIN_PUNPCKHWD,
17092 IX86_BUILTIN_PUNPCKHDQ,
17093 IX86_BUILTIN_PUNPCKLBW,
17094 IX86_BUILTIN_PUNPCKLWD,
17095 IX86_BUILTIN_PUNPCKLDQ,
17097 IX86_BUILTIN_SHUFPS,
17099 IX86_BUILTIN_RCPPS,
17100 IX86_BUILTIN_RCPSS,
17101 IX86_BUILTIN_RSQRTPS,
17102 IX86_BUILTIN_RSQRTPS_NR,
17103 IX86_BUILTIN_RSQRTSS,
17104 IX86_BUILTIN_RSQRTF,
17105 IX86_BUILTIN_SQRTPS,
17106 IX86_BUILTIN_SQRTPS_NR,
17107 IX86_BUILTIN_SQRTSS,
17109 IX86_BUILTIN_UNPCKHPS,
17110 IX86_BUILTIN_UNPCKLPS,
17112 IX86_BUILTIN_ANDPS,
17113 IX86_BUILTIN_ANDNPS,
17115 IX86_BUILTIN_XORPS,
17118 IX86_BUILTIN_LDMXCSR,
17119 IX86_BUILTIN_STMXCSR,
17120 IX86_BUILTIN_SFENCE,
17122 /* 3DNow! Original */
17123 IX86_BUILTIN_FEMMS,
17124 IX86_BUILTIN_PAVGUSB,
17125 IX86_BUILTIN_PF2ID,
17126 IX86_BUILTIN_PFACC,
17127 IX86_BUILTIN_PFADD,
17128 IX86_BUILTIN_PFCMPEQ,
17129 IX86_BUILTIN_PFCMPGE,
17130 IX86_BUILTIN_PFCMPGT,
17131 IX86_BUILTIN_PFMAX,
17132 IX86_BUILTIN_PFMIN,
17133 IX86_BUILTIN_PFMUL,
17134 IX86_BUILTIN_PFRCP,
17135 IX86_BUILTIN_PFRCPIT1,
17136 IX86_BUILTIN_PFRCPIT2,
17137 IX86_BUILTIN_PFRSQIT1,
17138 IX86_BUILTIN_PFRSQRT,
17139 IX86_BUILTIN_PFSUB,
17140 IX86_BUILTIN_PFSUBR,
17141 IX86_BUILTIN_PI2FD,
17142 IX86_BUILTIN_PMULHRW,
17144 /* 3DNow! Athlon Extensions */
17145 IX86_BUILTIN_PF2IW,
17146 IX86_BUILTIN_PFNACC,
17147 IX86_BUILTIN_PFPNACC,
17148 IX86_BUILTIN_PI2FW,
17149 IX86_BUILTIN_PSWAPDSI,
17150 IX86_BUILTIN_PSWAPDSF,
17153 IX86_BUILTIN_ADDPD,
17154 IX86_BUILTIN_ADDSD,
17155 IX86_BUILTIN_DIVPD,
17156 IX86_BUILTIN_DIVSD,
17157 IX86_BUILTIN_MULPD,
17158 IX86_BUILTIN_MULSD,
17159 IX86_BUILTIN_SUBPD,
17160 IX86_BUILTIN_SUBSD,
17162 IX86_BUILTIN_CMPEQPD,
17163 IX86_BUILTIN_CMPLTPD,
17164 IX86_BUILTIN_CMPLEPD,
17165 IX86_BUILTIN_CMPGTPD,
17166 IX86_BUILTIN_CMPGEPD,
17167 IX86_BUILTIN_CMPNEQPD,
17168 IX86_BUILTIN_CMPNLTPD,
17169 IX86_BUILTIN_CMPNLEPD,
17170 IX86_BUILTIN_CMPNGTPD,
17171 IX86_BUILTIN_CMPNGEPD,
17172 IX86_BUILTIN_CMPORDPD,
17173 IX86_BUILTIN_CMPUNORDPD,
17174 IX86_BUILTIN_CMPEQSD,
17175 IX86_BUILTIN_CMPLTSD,
17176 IX86_BUILTIN_CMPLESD,
17177 IX86_BUILTIN_CMPNEQSD,
17178 IX86_BUILTIN_CMPNLTSD,
17179 IX86_BUILTIN_CMPNLESD,
17180 IX86_BUILTIN_CMPORDSD,
17181 IX86_BUILTIN_CMPUNORDSD,
17183 IX86_BUILTIN_COMIEQSD,
17184 IX86_BUILTIN_COMILTSD,
17185 IX86_BUILTIN_COMILESD,
17186 IX86_BUILTIN_COMIGTSD,
17187 IX86_BUILTIN_COMIGESD,
17188 IX86_BUILTIN_COMINEQSD,
17189 IX86_BUILTIN_UCOMIEQSD,
17190 IX86_BUILTIN_UCOMILTSD,
17191 IX86_BUILTIN_UCOMILESD,
17192 IX86_BUILTIN_UCOMIGTSD,
17193 IX86_BUILTIN_UCOMIGESD,
17194 IX86_BUILTIN_UCOMINEQSD,
17196 IX86_BUILTIN_MAXPD,
17197 IX86_BUILTIN_MAXSD,
17198 IX86_BUILTIN_MINPD,
17199 IX86_BUILTIN_MINSD,
17201 IX86_BUILTIN_ANDPD,
17202 IX86_BUILTIN_ANDNPD,
17204 IX86_BUILTIN_XORPD,
17206 IX86_BUILTIN_SQRTPD,
17207 IX86_BUILTIN_SQRTSD,
17209 IX86_BUILTIN_UNPCKHPD,
17210 IX86_BUILTIN_UNPCKLPD,
17212 IX86_BUILTIN_SHUFPD,
17214 IX86_BUILTIN_LOADUPD,
17215 IX86_BUILTIN_STOREUPD,
17216 IX86_BUILTIN_MOVSD,
17218 IX86_BUILTIN_LOADHPD,
17219 IX86_BUILTIN_LOADLPD,
17221 IX86_BUILTIN_CVTDQ2PD,
17222 IX86_BUILTIN_CVTDQ2PS,
17224 IX86_BUILTIN_CVTPD2DQ,
17225 IX86_BUILTIN_CVTPD2PI,
17226 IX86_BUILTIN_CVTPD2PS,
17227 IX86_BUILTIN_CVTTPD2DQ,
17228 IX86_BUILTIN_CVTTPD2PI,
17230 IX86_BUILTIN_CVTPI2PD,
17231 IX86_BUILTIN_CVTSI2SD,
17232 IX86_BUILTIN_CVTSI642SD,
17234 IX86_BUILTIN_CVTSD2SI,
17235 IX86_BUILTIN_CVTSD2SI64,
17236 IX86_BUILTIN_CVTSD2SS,
17237 IX86_BUILTIN_CVTSS2SD,
17238 IX86_BUILTIN_CVTTSD2SI,
17239 IX86_BUILTIN_CVTTSD2SI64,
17241 IX86_BUILTIN_CVTPS2DQ,
17242 IX86_BUILTIN_CVTPS2PD,
17243 IX86_BUILTIN_CVTTPS2DQ,
17245 IX86_BUILTIN_MOVNTI,
17246 IX86_BUILTIN_MOVNTPD,
17247 IX86_BUILTIN_MOVNTDQ,
17250 IX86_BUILTIN_MASKMOVDQU,
17251 IX86_BUILTIN_MOVMSKPD,
17252 IX86_BUILTIN_PMOVMSKB128,
17254 IX86_BUILTIN_PACKSSWB128,
17255 IX86_BUILTIN_PACKSSDW128,
17256 IX86_BUILTIN_PACKUSWB128,
17258 IX86_BUILTIN_PADDB128,
17259 IX86_BUILTIN_PADDW128,
17260 IX86_BUILTIN_PADDD128,
17261 IX86_BUILTIN_PADDQ128,
17262 IX86_BUILTIN_PADDSB128,
17263 IX86_BUILTIN_PADDSW128,
17264 IX86_BUILTIN_PADDUSB128,
17265 IX86_BUILTIN_PADDUSW128,
17266 IX86_BUILTIN_PSUBB128,
17267 IX86_BUILTIN_PSUBW128,
17268 IX86_BUILTIN_PSUBD128,
17269 IX86_BUILTIN_PSUBQ128,
17270 IX86_BUILTIN_PSUBSB128,
17271 IX86_BUILTIN_PSUBSW128,
17272 IX86_BUILTIN_PSUBUSB128,
17273 IX86_BUILTIN_PSUBUSW128,
17275 IX86_BUILTIN_PAND128,
17276 IX86_BUILTIN_PANDN128,
17277 IX86_BUILTIN_POR128,
17278 IX86_BUILTIN_PXOR128,
17280 IX86_BUILTIN_PAVGB128,
17281 IX86_BUILTIN_PAVGW128,
17283 IX86_BUILTIN_PCMPEQB128,
17284 IX86_BUILTIN_PCMPEQW128,
17285 IX86_BUILTIN_PCMPEQD128,
17286 IX86_BUILTIN_PCMPGTB128,
17287 IX86_BUILTIN_PCMPGTW128,
17288 IX86_BUILTIN_PCMPGTD128,
17290 IX86_BUILTIN_PMADDWD128,
17292 IX86_BUILTIN_PMAXSW128,
17293 IX86_BUILTIN_PMAXUB128,
17294 IX86_BUILTIN_PMINSW128,
17295 IX86_BUILTIN_PMINUB128,
17297 IX86_BUILTIN_PMULUDQ,
17298 IX86_BUILTIN_PMULUDQ128,
17299 IX86_BUILTIN_PMULHUW128,
17300 IX86_BUILTIN_PMULHW128,
17301 IX86_BUILTIN_PMULLW128,
17303 IX86_BUILTIN_PSADBW128,
17304 IX86_BUILTIN_PSHUFHW,
17305 IX86_BUILTIN_PSHUFLW,
17306 IX86_BUILTIN_PSHUFD,
17308 IX86_BUILTIN_PSLLDQI128,
17309 IX86_BUILTIN_PSLLWI128,
17310 IX86_BUILTIN_PSLLDI128,
17311 IX86_BUILTIN_PSLLQI128,
17312 IX86_BUILTIN_PSRAWI128,
17313 IX86_BUILTIN_PSRADI128,
17314 IX86_BUILTIN_PSRLDQI128,
17315 IX86_BUILTIN_PSRLWI128,
17316 IX86_BUILTIN_PSRLDI128,
17317 IX86_BUILTIN_PSRLQI128,
17319 IX86_BUILTIN_PSLLDQ128,
17320 IX86_BUILTIN_PSLLW128,
17321 IX86_BUILTIN_PSLLD128,
17322 IX86_BUILTIN_PSLLQ128,
17323 IX86_BUILTIN_PSRAW128,
17324 IX86_BUILTIN_PSRAD128,
17325 IX86_BUILTIN_PSRLW128,
17326 IX86_BUILTIN_PSRLD128,
17327 IX86_BUILTIN_PSRLQ128,
17329 IX86_BUILTIN_PUNPCKHBW128,
17330 IX86_BUILTIN_PUNPCKHWD128,
17331 IX86_BUILTIN_PUNPCKHDQ128,
17332 IX86_BUILTIN_PUNPCKHQDQ128,
17333 IX86_BUILTIN_PUNPCKLBW128,
17334 IX86_BUILTIN_PUNPCKLWD128,
17335 IX86_BUILTIN_PUNPCKLDQ128,
17336 IX86_BUILTIN_PUNPCKLQDQ128,
17338 IX86_BUILTIN_CLFLUSH,
17339 IX86_BUILTIN_MFENCE,
17340 IX86_BUILTIN_LFENCE,
17342 /* Prescott New Instructions. */
17343 IX86_BUILTIN_ADDSUBPS,
17344 IX86_BUILTIN_HADDPS,
17345 IX86_BUILTIN_HSUBPS,
17346 IX86_BUILTIN_MOVSHDUP,
17347 IX86_BUILTIN_MOVSLDUP,
17348 IX86_BUILTIN_ADDSUBPD,
17349 IX86_BUILTIN_HADDPD,
17350 IX86_BUILTIN_HSUBPD,
17351 IX86_BUILTIN_LDDQU,
17353 IX86_BUILTIN_MONITOR,
17354 IX86_BUILTIN_MWAIT,
17357 IX86_BUILTIN_PHADDW,
17358 IX86_BUILTIN_PHADDD,
17359 IX86_BUILTIN_PHADDSW,
17360 IX86_BUILTIN_PHSUBW,
17361 IX86_BUILTIN_PHSUBD,
17362 IX86_BUILTIN_PHSUBSW,
17363 IX86_BUILTIN_PMADDUBSW,
17364 IX86_BUILTIN_PMULHRSW,
17365 IX86_BUILTIN_PSHUFB,
17366 IX86_BUILTIN_PSIGNB,
17367 IX86_BUILTIN_PSIGNW,
17368 IX86_BUILTIN_PSIGND,
17369 IX86_BUILTIN_PALIGNR,
17370 IX86_BUILTIN_PABSB,
17371 IX86_BUILTIN_PABSW,
17372 IX86_BUILTIN_PABSD,
17374 IX86_BUILTIN_PHADDW128,
17375 IX86_BUILTIN_PHADDD128,
17376 IX86_BUILTIN_PHADDSW128,
17377 IX86_BUILTIN_PHSUBW128,
17378 IX86_BUILTIN_PHSUBD128,
17379 IX86_BUILTIN_PHSUBSW128,
17380 IX86_BUILTIN_PMADDUBSW128,
17381 IX86_BUILTIN_PMULHRSW128,
17382 IX86_BUILTIN_PSHUFB128,
17383 IX86_BUILTIN_PSIGNB128,
17384 IX86_BUILTIN_PSIGNW128,
17385 IX86_BUILTIN_PSIGND128,
17386 IX86_BUILTIN_PALIGNR128,
17387 IX86_BUILTIN_PABSB128,
17388 IX86_BUILTIN_PABSW128,
17389 IX86_BUILTIN_PABSD128,
17391 /* AMDFAM10 - SSE4A New Instructions. */
17392 IX86_BUILTIN_MOVNTSD,
17393 IX86_BUILTIN_MOVNTSS,
17394 IX86_BUILTIN_EXTRQI,
17395 IX86_BUILTIN_EXTRQ,
17396 IX86_BUILTIN_INSERTQI,
17397 IX86_BUILTIN_INSERTQ,
17400 IX86_BUILTIN_BLENDPD,
17401 IX86_BUILTIN_BLENDPS,
17402 IX86_BUILTIN_BLENDVPD,
17403 IX86_BUILTIN_BLENDVPS,
17404 IX86_BUILTIN_PBLENDVB128,
17405 IX86_BUILTIN_PBLENDW128,
17410 IX86_BUILTIN_INSERTPS128,
17412 IX86_BUILTIN_MOVNTDQA,
17413 IX86_BUILTIN_MPSADBW128,
17414 IX86_BUILTIN_PACKUSDW128,
17415 IX86_BUILTIN_PCMPEQQ,
17416 IX86_BUILTIN_PHMINPOSUW128,
17418 IX86_BUILTIN_PMAXSB128,
17419 IX86_BUILTIN_PMAXSD128,
17420 IX86_BUILTIN_PMAXUD128,
17421 IX86_BUILTIN_PMAXUW128,
17423 IX86_BUILTIN_PMINSB128,
17424 IX86_BUILTIN_PMINSD128,
17425 IX86_BUILTIN_PMINUD128,
17426 IX86_BUILTIN_PMINUW128,
17428 IX86_BUILTIN_PMOVSXBW128,
17429 IX86_BUILTIN_PMOVSXBD128,
17430 IX86_BUILTIN_PMOVSXBQ128,
17431 IX86_BUILTIN_PMOVSXWD128,
17432 IX86_BUILTIN_PMOVSXWQ128,
17433 IX86_BUILTIN_PMOVSXDQ128,
17435 IX86_BUILTIN_PMOVZXBW128,
17436 IX86_BUILTIN_PMOVZXBD128,
17437 IX86_BUILTIN_PMOVZXBQ128,
17438 IX86_BUILTIN_PMOVZXWD128,
17439 IX86_BUILTIN_PMOVZXWQ128,
17440 IX86_BUILTIN_PMOVZXDQ128,
17442 IX86_BUILTIN_PMULDQ128,
17443 IX86_BUILTIN_PMULLD128,
17445 IX86_BUILTIN_ROUNDPD,
17446 IX86_BUILTIN_ROUNDPS,
17447 IX86_BUILTIN_ROUNDSD,
17448 IX86_BUILTIN_ROUNDSS,
17450 IX86_BUILTIN_PTESTZ,
17451 IX86_BUILTIN_PTESTC,
17452 IX86_BUILTIN_PTESTNZC,
17454 IX86_BUILTIN_VEC_INIT_V2SI,
17455 IX86_BUILTIN_VEC_INIT_V4HI,
17456 IX86_BUILTIN_VEC_INIT_V8QI,
17457 IX86_BUILTIN_VEC_EXT_V2DF,
17458 IX86_BUILTIN_VEC_EXT_V2DI,
17459 IX86_BUILTIN_VEC_EXT_V4SF,
17460 IX86_BUILTIN_VEC_EXT_V4SI,
17461 IX86_BUILTIN_VEC_EXT_V8HI,
17462 IX86_BUILTIN_VEC_EXT_V2SI,
17463 IX86_BUILTIN_VEC_EXT_V4HI,
17464 IX86_BUILTIN_VEC_EXT_V16QI,
17465 IX86_BUILTIN_VEC_SET_V2DI,
17466 IX86_BUILTIN_VEC_SET_V4SF,
17467 IX86_BUILTIN_VEC_SET_V4SI,
17468 IX86_BUILTIN_VEC_SET_V8HI,
17469 IX86_BUILTIN_VEC_SET_V4HI,
17470 IX86_BUILTIN_VEC_SET_V16QI,
17472 IX86_BUILTIN_VEC_PACK_SFIX,
17475 IX86_BUILTIN_CRC32QI,
17476 IX86_BUILTIN_CRC32HI,
17477 IX86_BUILTIN_CRC32SI,
17478 IX86_BUILTIN_CRC32DI,
17480 IX86_BUILTIN_PCMPESTRI128,
17481 IX86_BUILTIN_PCMPESTRM128,
17482 IX86_BUILTIN_PCMPESTRA128,
17483 IX86_BUILTIN_PCMPESTRC128,
17484 IX86_BUILTIN_PCMPESTRO128,
17485 IX86_BUILTIN_PCMPESTRS128,
17486 IX86_BUILTIN_PCMPESTRZ128,
17487 IX86_BUILTIN_PCMPISTRI128,
17488 IX86_BUILTIN_PCMPISTRM128,
17489 IX86_BUILTIN_PCMPISTRA128,
17490 IX86_BUILTIN_PCMPISTRC128,
17491 IX86_BUILTIN_PCMPISTRO128,
17492 IX86_BUILTIN_PCMPISTRS128,
17493 IX86_BUILTIN_PCMPISTRZ128,
17495 IX86_BUILTIN_PCMPGTQ,
17497 /* TFmode support builtins. */
17499 IX86_BUILTIN_FABSQ,
17500 IX86_BUILTIN_COPYSIGNQ,
17502 /* SSE5 instructions */
17503 IX86_BUILTIN_FMADDSS,
17504 IX86_BUILTIN_FMADDSD,
17505 IX86_BUILTIN_FMADDPS,
17506 IX86_BUILTIN_FMADDPD,
17507 IX86_BUILTIN_FMSUBSS,
17508 IX86_BUILTIN_FMSUBSD,
17509 IX86_BUILTIN_FMSUBPS,
17510 IX86_BUILTIN_FMSUBPD,
17511 IX86_BUILTIN_FNMADDSS,
17512 IX86_BUILTIN_FNMADDSD,
17513 IX86_BUILTIN_FNMADDPS,
17514 IX86_BUILTIN_FNMADDPD,
17515 IX86_BUILTIN_FNMSUBSS,
17516 IX86_BUILTIN_FNMSUBSD,
17517 IX86_BUILTIN_FNMSUBPS,
17518 IX86_BUILTIN_FNMSUBPD,
17519 IX86_BUILTIN_PCMOV_V2DI,
17520 IX86_BUILTIN_PCMOV_V4SI,
17521 IX86_BUILTIN_PCMOV_V8HI,
17522 IX86_BUILTIN_PCMOV_V16QI,
17523 IX86_BUILTIN_PCMOV_V4SF,
17524 IX86_BUILTIN_PCMOV_V2DF,
17525 IX86_BUILTIN_PPERM,
17526 IX86_BUILTIN_PERMPS,
17527 IX86_BUILTIN_PERMPD,
17528 IX86_BUILTIN_PMACSSWW,
17529 IX86_BUILTIN_PMACSWW,
17530 IX86_BUILTIN_PMACSSWD,
17531 IX86_BUILTIN_PMACSWD,
17532 IX86_BUILTIN_PMACSSDD,
17533 IX86_BUILTIN_PMACSDD,
17534 IX86_BUILTIN_PMACSSDQL,
17535 IX86_BUILTIN_PMACSSDQH,
17536 IX86_BUILTIN_PMACSDQL,
17537 IX86_BUILTIN_PMACSDQH,
17538 IX86_BUILTIN_PMADCSSWD,
17539 IX86_BUILTIN_PMADCSWD,
17540 IX86_BUILTIN_PHADDBW,
17541 IX86_BUILTIN_PHADDBD,
17542 IX86_BUILTIN_PHADDBQ,
17543 IX86_BUILTIN_PHADDWD,
17544 IX86_BUILTIN_PHADDWQ,
17545 IX86_BUILTIN_PHADDDQ,
17546 IX86_BUILTIN_PHADDUBW,
17547 IX86_BUILTIN_PHADDUBD,
17548 IX86_BUILTIN_PHADDUBQ,
17549 IX86_BUILTIN_PHADDUWD,
17550 IX86_BUILTIN_PHADDUWQ,
17551 IX86_BUILTIN_PHADDUDQ,
17552 IX86_BUILTIN_PHSUBBW,
17553 IX86_BUILTIN_PHSUBWD,
17554 IX86_BUILTIN_PHSUBDQ,
17555 IX86_BUILTIN_PROTB,
17556 IX86_BUILTIN_PROTW,
17557 IX86_BUILTIN_PROTD,
17558 IX86_BUILTIN_PROTQ,
17559 IX86_BUILTIN_PROTB_IMM,
17560 IX86_BUILTIN_PROTW_IMM,
17561 IX86_BUILTIN_PROTD_IMM,
17562 IX86_BUILTIN_PROTQ_IMM,
17563 IX86_BUILTIN_PSHLB,
17564 IX86_BUILTIN_PSHLW,
17565 IX86_BUILTIN_PSHLD,
17566 IX86_BUILTIN_PSHLQ,
17567 IX86_BUILTIN_PSHAB,
17568 IX86_BUILTIN_PSHAW,
17569 IX86_BUILTIN_PSHAD,
17570 IX86_BUILTIN_PSHAQ,
17571 IX86_BUILTIN_FRCZSS,
17572 IX86_BUILTIN_FRCZSD,
17573 IX86_BUILTIN_FRCZPS,
17574 IX86_BUILTIN_FRCZPD,
17575 IX86_BUILTIN_CVTPH2PS,
17576 IX86_BUILTIN_CVTPS2PH,
17578 IX86_BUILTIN_COMEQSS,
17579 IX86_BUILTIN_COMNESS,
17580 IX86_BUILTIN_COMLTSS,
17581 IX86_BUILTIN_COMLESS,
17582 IX86_BUILTIN_COMGTSS,
17583 IX86_BUILTIN_COMGESS,
17584 IX86_BUILTIN_COMUEQSS,
17585 IX86_BUILTIN_COMUNESS,
17586 IX86_BUILTIN_COMULTSS,
17587 IX86_BUILTIN_COMULESS,
17588 IX86_BUILTIN_COMUGTSS,
17589 IX86_BUILTIN_COMUGESS,
17590 IX86_BUILTIN_COMORDSS,
17591 IX86_BUILTIN_COMUNORDSS,
17592 IX86_BUILTIN_COMFALSESS,
17593 IX86_BUILTIN_COMTRUESS,
17595 IX86_BUILTIN_COMEQSD,
17596 IX86_BUILTIN_COMNESD,
17597 IX86_BUILTIN_COMLTSD,
17598 IX86_BUILTIN_COMLESD,
17599 IX86_BUILTIN_COMGTSD,
17600 IX86_BUILTIN_COMGESD,
17601 IX86_BUILTIN_COMUEQSD,
17602 IX86_BUILTIN_COMUNESD,
17603 IX86_BUILTIN_COMULTSD,
17604 IX86_BUILTIN_COMULESD,
17605 IX86_BUILTIN_COMUGTSD,
17606 IX86_BUILTIN_COMUGESD,
17607 IX86_BUILTIN_COMORDSD,
17608 IX86_BUILTIN_COMUNORDSD,
17609 IX86_BUILTIN_COMFALSESD,
17610 IX86_BUILTIN_COMTRUESD,
17612 IX86_BUILTIN_COMEQPS,
17613 IX86_BUILTIN_COMNEPS,
17614 IX86_BUILTIN_COMLTPS,
17615 IX86_BUILTIN_COMLEPS,
17616 IX86_BUILTIN_COMGTPS,
17617 IX86_BUILTIN_COMGEPS,
17618 IX86_BUILTIN_COMUEQPS,
17619 IX86_BUILTIN_COMUNEPS,
17620 IX86_BUILTIN_COMULTPS,
17621 IX86_BUILTIN_COMULEPS,
17622 IX86_BUILTIN_COMUGTPS,
17623 IX86_BUILTIN_COMUGEPS,
17624 IX86_BUILTIN_COMORDPS,
17625 IX86_BUILTIN_COMUNORDPS,
17626 IX86_BUILTIN_COMFALSEPS,
17627 IX86_BUILTIN_COMTRUEPS,
17629 IX86_BUILTIN_COMEQPD,
17630 IX86_BUILTIN_COMNEPD,
17631 IX86_BUILTIN_COMLTPD,
17632 IX86_BUILTIN_COMLEPD,
17633 IX86_BUILTIN_COMGTPD,
17634 IX86_BUILTIN_COMGEPD,
17635 IX86_BUILTIN_COMUEQPD,
17636 IX86_BUILTIN_COMUNEPD,
17637 IX86_BUILTIN_COMULTPD,
17638 IX86_BUILTIN_COMULEPD,
17639 IX86_BUILTIN_COMUGTPD,
17640 IX86_BUILTIN_COMUGEPD,
17641 IX86_BUILTIN_COMORDPD,
17642 IX86_BUILTIN_COMUNORDPD,
17643 IX86_BUILTIN_COMFALSEPD,
17644 IX86_BUILTIN_COMTRUEPD,
17646 IX86_BUILTIN_PCOMEQUB,
17647 IX86_BUILTIN_PCOMNEUB,
17648 IX86_BUILTIN_PCOMLTUB,
17649 IX86_BUILTIN_PCOMLEUB,
17650 IX86_BUILTIN_PCOMGTUB,
17651 IX86_BUILTIN_PCOMGEUB,
17652 IX86_BUILTIN_PCOMFALSEUB,
17653 IX86_BUILTIN_PCOMTRUEUB,
17654 IX86_BUILTIN_PCOMEQUW,
17655 IX86_BUILTIN_PCOMNEUW,
17656 IX86_BUILTIN_PCOMLTUW,
17657 IX86_BUILTIN_PCOMLEUW,
17658 IX86_BUILTIN_PCOMGTUW,
17659 IX86_BUILTIN_PCOMGEUW,
17660 IX86_BUILTIN_PCOMFALSEUW,
17661 IX86_BUILTIN_PCOMTRUEUW,
17662 IX86_BUILTIN_PCOMEQUD,
17663 IX86_BUILTIN_PCOMNEUD,
17664 IX86_BUILTIN_PCOMLTUD,
17665 IX86_BUILTIN_PCOMLEUD,
17666 IX86_BUILTIN_PCOMGTUD,
17667 IX86_BUILTIN_PCOMGEUD,
17668 IX86_BUILTIN_PCOMFALSEUD,
17669 IX86_BUILTIN_PCOMTRUEUD,
17670 IX86_BUILTIN_PCOMEQUQ,
17671 IX86_BUILTIN_PCOMNEUQ,
17672 IX86_BUILTIN_PCOMLTUQ,
17673 IX86_BUILTIN_PCOMLEUQ,
17674 IX86_BUILTIN_PCOMGTUQ,
17675 IX86_BUILTIN_PCOMGEUQ,
17676 IX86_BUILTIN_PCOMFALSEUQ,
17677 IX86_BUILTIN_PCOMTRUEUQ,
17679 IX86_BUILTIN_PCOMEQB,
17680 IX86_BUILTIN_PCOMNEB,
17681 IX86_BUILTIN_PCOMLTB,
17682 IX86_BUILTIN_PCOMLEB,
17683 IX86_BUILTIN_PCOMGTB,
17684 IX86_BUILTIN_PCOMGEB,
17685 IX86_BUILTIN_PCOMFALSEB,
17686 IX86_BUILTIN_PCOMTRUEB,
17687 IX86_BUILTIN_PCOMEQW,
17688 IX86_BUILTIN_PCOMNEW,
17689 IX86_BUILTIN_PCOMLTW,
17690 IX86_BUILTIN_PCOMLEW,
17691 IX86_BUILTIN_PCOMGTW,
17692 IX86_BUILTIN_PCOMGEW,
17693 IX86_BUILTIN_PCOMFALSEW,
17694 IX86_BUILTIN_PCOMTRUEW,
17695 IX86_BUILTIN_PCOMEQD,
17696 IX86_BUILTIN_PCOMNED,
17697 IX86_BUILTIN_PCOMLTD,
17698 IX86_BUILTIN_PCOMLED,
17699 IX86_BUILTIN_PCOMGTD,
17700 IX86_BUILTIN_PCOMGED,
17701 IX86_BUILTIN_PCOMFALSED,
17702 IX86_BUILTIN_PCOMTRUED,
17703 IX86_BUILTIN_PCOMEQQ,
17704 IX86_BUILTIN_PCOMNEQ,
17705 IX86_BUILTIN_PCOMLTQ,
17706 IX86_BUILTIN_PCOMLEQ,
17707 IX86_BUILTIN_PCOMGTQ,
17708 IX86_BUILTIN_PCOMGEQ,
17709 IX86_BUILTIN_PCOMFALSEQ,
17710 IX86_BUILTIN_PCOMTRUEQ,
17715 /* Table for the ix86 builtin decls. */
17716 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17718 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17719 * if the target_flags include one of MASK. Stores the function decl
17720 * in the ix86_builtins array.
17721 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17724 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17726 tree decl = NULL_TREE;
17728 if (mask & ix86_isa_flags
17729 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17731 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17733 ix86_builtins[(int) code] = decl;
17739 /* Like def_builtin, but also marks the function decl "const". */
17742 def_builtin_const (int mask, const char *name, tree type,
17743 enum ix86_builtins code)
17745 tree decl = def_builtin (mask, name, type, code);
17747 TREE_READONLY (decl) = 1;
17751 /* Bits for builtin_description.flag. */
17753 /* Set when we don't support the comparison natively, and should
17754 swap_comparison in order to support it. */
17755 #define BUILTIN_DESC_SWAP_OPERANDS 1
17757 struct builtin_description
17759 const unsigned int mask;
17760 const enum insn_code icode;
17761 const char *const name;
17762 const enum ix86_builtins code;
17763 const enum rtx_code comparison;
17767 static const struct builtin_description bdesc_comi[] =
17769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17775 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17776 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17777 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17778 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17779 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17780 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17792 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17795 static const struct builtin_description bdesc_ptest[] =
17798 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17799 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17800 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17803 static const struct builtin_description bdesc_pcmpestr[] =
17806 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17807 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17808 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17809 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17810 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17811 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17812 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17815 static const struct builtin_description bdesc_pcmpistr[] =
17818 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17819 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17820 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17821 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17822 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17823 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17824 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17827 static const struct builtin_description bdesc_crc32[] =
17830 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17831 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17832 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17833 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17836 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17837 static const struct builtin_description bdesc_sse_3arg[] =
17840 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17841 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17842 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17843 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17844 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17845 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17846 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17847 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17848 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17849 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17850 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17851 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17854 static const struct builtin_description bdesc_2arg[] =
17857 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17858 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17859 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17860 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17861 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17862 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17864 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17866 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17885 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17887 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17889 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17890 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17894 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17896 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17897 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17906 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17907 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17908 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17909 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17910 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17911 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17912 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17913 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17915 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17917 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17918 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17919 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17920 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17921 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17922 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17924 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17925 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17926 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17929 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17930 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17933 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17934 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17939 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17940 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17941 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17943 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17944 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17945 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17946 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17949 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17956 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17958 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17962 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17967 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17969 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17979 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17980 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17983 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17984 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17987 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17988 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17989 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17990 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
18000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
18001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
18002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
18003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
18004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
18005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
18008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
18009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
18010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
18011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
18012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
18013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
18014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
18015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
18017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
18018 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
18019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
18020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
18022 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
18023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
18024 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
18025 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
18027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
18028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
18029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
18031 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
18034 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
18035 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
18036 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
18037 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
18038 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
18039 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
18040 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
18041 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
18043 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
18044 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
18045 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
18046 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
18047 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
18048 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
18049 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
18050 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
18052 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
18053 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
18055 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
18056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
18057 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
18058 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
18060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
18061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
18063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
18064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
18065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
18066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
18067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
18068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
18070 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
18071 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
18072 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
18073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
18075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
18076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
18077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
18078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
18079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
18080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
18081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
18082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
18084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
18085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
18086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
18088 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
18089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
18091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
18092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
18094 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
18095 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
18096 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
18098 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
18099 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
18100 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
18102 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
18103 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
18105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
18107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
18108 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
18109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
18110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
18113 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
18114 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
18115 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
18116 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
18117 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
18118 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
18121 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
18122 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
18123 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
18124 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
18125 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
18126 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
18127 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
18128 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
18129 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
18130 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
18131 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
18132 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18133 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18134 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18135 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18136 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18137 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18138 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18139 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18140 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18141 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18142 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18143 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18144 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18147 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18148 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18149 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18150 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18151 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18152 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18153 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18154 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18155 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18156 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18157 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18158 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18161 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18164 static const struct builtin_description bdesc_1arg[] =
18166 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18167 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18169 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18170 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS_NR, UNKNOWN, 0 },
18171 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18172 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, 0 },
18173 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18175 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18176 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18177 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18178 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18179 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18180 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18200 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18201 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18208 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18209 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18212 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18213 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18214 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18215 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18216 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18217 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18220 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18221 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18222 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18223 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18224 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18225 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18226 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18227 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18228 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18229 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18231 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18232 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18234 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18235 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
18236 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
18240 enum multi_arg_type {
18250 MULTI_ARG_3_PERMPS,
18251 MULTI_ARG_3_PERMPD,
18258 MULTI_ARG_2_DI_IMM,
18259 MULTI_ARG_2_SI_IMM,
18260 MULTI_ARG_2_HI_IMM,
18261 MULTI_ARG_2_QI_IMM,
18262 MULTI_ARG_2_SF_CMP,
18263 MULTI_ARG_2_DF_CMP,
18264 MULTI_ARG_2_DI_CMP,
18265 MULTI_ARG_2_SI_CMP,
18266 MULTI_ARG_2_HI_CMP,
18267 MULTI_ARG_2_QI_CMP,
18290 static const struct builtin_description bdesc_multi_arg[] =
18292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18334 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18335 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18336 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18337 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18353 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18361 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18385 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18453 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18477 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18496 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18497 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18498 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18502 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18523 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18524 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18525 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18528 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18529 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18532 ix86_init_mmx_sse_builtins (void)
18534 const struct builtin_description * d;
18537 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18538 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18539 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18540 tree V2DI_type_node
18541 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18542 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18543 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18544 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18545 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18546 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18547 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18549 tree pchar_type_node = build_pointer_type (char_type_node);
18550 tree pcchar_type_node = build_pointer_type (
18551 build_type_variant (char_type_node, 1, 0));
18552 tree pfloat_type_node = build_pointer_type (float_type_node);
18553 tree pcfloat_type_node = build_pointer_type (
18554 build_type_variant (float_type_node, 1, 0));
18555 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18556 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18557 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18560 tree int_ftype_v4sf_v4sf
18561 = build_function_type_list (integer_type_node,
18562 V4SF_type_node, V4SF_type_node, NULL_TREE);
18563 tree v4si_ftype_v4sf_v4sf
18564 = build_function_type_list (V4SI_type_node,
18565 V4SF_type_node, V4SF_type_node, NULL_TREE);
18566 /* MMX/SSE/integer conversions. */
18567 tree int_ftype_v4sf
18568 = build_function_type_list (integer_type_node,
18569 V4SF_type_node, NULL_TREE);
18570 tree int64_ftype_v4sf
18571 = build_function_type_list (long_long_integer_type_node,
18572 V4SF_type_node, NULL_TREE);
18573 tree int_ftype_v8qi
18574 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18575 tree v4sf_ftype_v4sf_int
18576 = build_function_type_list (V4SF_type_node,
18577 V4SF_type_node, integer_type_node, NULL_TREE);
18578 tree v4sf_ftype_v4sf_int64
18579 = build_function_type_list (V4SF_type_node,
18580 V4SF_type_node, long_long_integer_type_node,
18582 tree v4sf_ftype_v4sf_v2si
18583 = build_function_type_list (V4SF_type_node,
18584 V4SF_type_node, V2SI_type_node, NULL_TREE);
18586 /* Miscellaneous. */
18587 tree v8qi_ftype_v4hi_v4hi
18588 = build_function_type_list (V8QI_type_node,
18589 V4HI_type_node, V4HI_type_node, NULL_TREE);
18590 tree v4hi_ftype_v2si_v2si
18591 = build_function_type_list (V4HI_type_node,
18592 V2SI_type_node, V2SI_type_node, NULL_TREE);
18593 tree v4sf_ftype_v4sf_v4sf_int
18594 = build_function_type_list (V4SF_type_node,
18595 V4SF_type_node, V4SF_type_node,
18596 integer_type_node, NULL_TREE);
18597 tree v2si_ftype_v4hi_v4hi
18598 = build_function_type_list (V2SI_type_node,
18599 V4HI_type_node, V4HI_type_node, NULL_TREE);
18600 tree v4hi_ftype_v4hi_int
18601 = build_function_type_list (V4HI_type_node,
18602 V4HI_type_node, integer_type_node, NULL_TREE);
18603 tree v4hi_ftype_v4hi_di
18604 = build_function_type_list (V4HI_type_node,
18605 V4HI_type_node, long_long_unsigned_type_node,
18607 tree v2si_ftype_v2si_di
18608 = build_function_type_list (V2SI_type_node,
18609 V2SI_type_node, long_long_unsigned_type_node,
18611 tree void_ftype_void
18612 = build_function_type (void_type_node, void_list_node);
18613 tree void_ftype_unsigned
18614 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18615 tree void_ftype_unsigned_unsigned
18616 = build_function_type_list (void_type_node, unsigned_type_node,
18617 unsigned_type_node, NULL_TREE);
18618 tree void_ftype_pcvoid_unsigned_unsigned
18619 = build_function_type_list (void_type_node, const_ptr_type_node,
18620 unsigned_type_node, unsigned_type_node,
18622 tree unsigned_ftype_void
18623 = build_function_type (unsigned_type_node, void_list_node);
18624 tree v2si_ftype_v4sf
18625 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18626 /* Loads/stores. */
18627 tree void_ftype_v8qi_v8qi_pchar
18628 = build_function_type_list (void_type_node,
18629 V8QI_type_node, V8QI_type_node,
18630 pchar_type_node, NULL_TREE);
18631 tree v4sf_ftype_pcfloat
18632 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18633 /* @@@ the type is bogus */
18634 tree v4sf_ftype_v4sf_pv2si
18635 = build_function_type_list (V4SF_type_node,
18636 V4SF_type_node, pv2si_type_node, NULL_TREE);
18637 tree void_ftype_pv2si_v4sf
18638 = build_function_type_list (void_type_node,
18639 pv2si_type_node, V4SF_type_node, NULL_TREE);
18640 tree void_ftype_pfloat_v4sf
18641 = build_function_type_list (void_type_node,
18642 pfloat_type_node, V4SF_type_node, NULL_TREE);
18643 tree void_ftype_pdi_di
18644 = build_function_type_list (void_type_node,
18645 pdi_type_node, long_long_unsigned_type_node,
18647 tree void_ftype_pv2di_v2di
18648 = build_function_type_list (void_type_node,
18649 pv2di_type_node, V2DI_type_node, NULL_TREE);
18650 /* Normal vector unops. */
18651 tree v4sf_ftype_v4sf
18652 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18653 tree v16qi_ftype_v16qi
18654 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18655 tree v8hi_ftype_v8hi
18656 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18657 tree v4si_ftype_v4si
18658 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18659 tree v8qi_ftype_v8qi
18660 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18661 tree v4hi_ftype_v4hi
18662 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18664 /* Normal vector binops. */
18665 tree v4sf_ftype_v4sf_v4sf
18666 = build_function_type_list (V4SF_type_node,
18667 V4SF_type_node, V4SF_type_node, NULL_TREE);
18668 tree v8qi_ftype_v8qi_v8qi
18669 = build_function_type_list (V8QI_type_node,
18670 V8QI_type_node, V8QI_type_node, NULL_TREE);
18671 tree v4hi_ftype_v4hi_v4hi
18672 = build_function_type_list (V4HI_type_node,
18673 V4HI_type_node, V4HI_type_node, NULL_TREE);
18674 tree v2si_ftype_v2si_v2si
18675 = build_function_type_list (V2SI_type_node,
18676 V2SI_type_node, V2SI_type_node, NULL_TREE);
18677 tree di_ftype_di_di
18678 = build_function_type_list (long_long_unsigned_type_node,
18679 long_long_unsigned_type_node,
18680 long_long_unsigned_type_node, NULL_TREE);
18682 tree di_ftype_di_di_int
18683 = build_function_type_list (long_long_unsigned_type_node,
18684 long_long_unsigned_type_node,
18685 long_long_unsigned_type_node,
18686 integer_type_node, NULL_TREE);
18688 tree v2si_ftype_v2sf
18689 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18690 tree v2sf_ftype_v2si
18691 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18692 tree v2si_ftype_v2si
18693 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18694 tree v2sf_ftype_v2sf
18695 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18696 tree v2sf_ftype_v2sf_v2sf
18697 = build_function_type_list (V2SF_type_node,
18698 V2SF_type_node, V2SF_type_node, NULL_TREE);
18699 tree v2si_ftype_v2sf_v2sf
18700 = build_function_type_list (V2SI_type_node,
18701 V2SF_type_node, V2SF_type_node, NULL_TREE);
18702 tree pint_type_node = build_pointer_type (integer_type_node);
18703 tree pdouble_type_node = build_pointer_type (double_type_node);
18704 tree pcdouble_type_node = build_pointer_type (
18705 build_type_variant (double_type_node, 1, 0));
18706 tree int_ftype_v2df_v2df
18707 = build_function_type_list (integer_type_node,
18708 V2DF_type_node, V2DF_type_node, NULL_TREE);
18710 tree void_ftype_pcvoid
18711 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18712 tree v4sf_ftype_v4si
18713 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18714 tree v4si_ftype_v4sf
18715 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18716 tree v2df_ftype_v4si
18717 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18718 tree v4si_ftype_v2df
18719 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18720 tree v4si_ftype_v2df_v2df
18721 = build_function_type_list (V4SI_type_node,
18722 V2DF_type_node, V2DF_type_node, NULL_TREE);
18723 tree v2si_ftype_v2df
18724 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18725 tree v4sf_ftype_v2df
18726 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18727 tree v2df_ftype_v2si
18728 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18729 tree v2df_ftype_v4sf
18730 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18731 tree int_ftype_v2df
18732 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18733 tree int64_ftype_v2df
18734 = build_function_type_list (long_long_integer_type_node,
18735 V2DF_type_node, NULL_TREE);
18736 tree v2df_ftype_v2df_int
18737 = build_function_type_list (V2DF_type_node,
18738 V2DF_type_node, integer_type_node, NULL_TREE);
18739 tree v2df_ftype_v2df_int64
18740 = build_function_type_list (V2DF_type_node,
18741 V2DF_type_node, long_long_integer_type_node,
18743 tree v4sf_ftype_v4sf_v2df
18744 = build_function_type_list (V4SF_type_node,
18745 V4SF_type_node, V2DF_type_node, NULL_TREE);
18746 tree v2df_ftype_v2df_v4sf
18747 = build_function_type_list (V2DF_type_node,
18748 V2DF_type_node, V4SF_type_node, NULL_TREE);
18749 tree v2df_ftype_v2df_v2df_int
18750 = build_function_type_list (V2DF_type_node,
18751 V2DF_type_node, V2DF_type_node,
18754 tree v2df_ftype_v2df_pcdouble
18755 = build_function_type_list (V2DF_type_node,
18756 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18757 tree void_ftype_pdouble_v2df
18758 = build_function_type_list (void_type_node,
18759 pdouble_type_node, V2DF_type_node, NULL_TREE);
18760 tree void_ftype_pint_int
18761 = build_function_type_list (void_type_node,
18762 pint_type_node, integer_type_node, NULL_TREE);
18763 tree void_ftype_v16qi_v16qi_pchar
18764 = build_function_type_list (void_type_node,
18765 V16QI_type_node, V16QI_type_node,
18766 pchar_type_node, NULL_TREE);
18767 tree v2df_ftype_pcdouble
18768 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18769 tree v2df_ftype_v2df_v2df
18770 = build_function_type_list (V2DF_type_node,
18771 V2DF_type_node, V2DF_type_node, NULL_TREE);
18772 tree v16qi_ftype_v16qi_v16qi
18773 = build_function_type_list (V16QI_type_node,
18774 V16QI_type_node, V16QI_type_node, NULL_TREE);
18775 tree v8hi_ftype_v8hi_v8hi
18776 = build_function_type_list (V8HI_type_node,
18777 V8HI_type_node, V8HI_type_node, NULL_TREE);
18778 tree v4si_ftype_v4si_v4si
18779 = build_function_type_list (V4SI_type_node,
18780 V4SI_type_node, V4SI_type_node, NULL_TREE);
18781 tree v2di_ftype_v2di_v2di
18782 = build_function_type_list (V2DI_type_node,
18783 V2DI_type_node, V2DI_type_node, NULL_TREE);
18784 tree v2di_ftype_v2df_v2df
18785 = build_function_type_list (V2DI_type_node,
18786 V2DF_type_node, V2DF_type_node, NULL_TREE);
18787 tree v2df_ftype_v2df
18788 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18789 tree v2di_ftype_v2di_int
18790 = build_function_type_list (V2DI_type_node,
18791 V2DI_type_node, integer_type_node, NULL_TREE);
18792 tree v2di_ftype_v2di_v2di_int
18793 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18794 V2DI_type_node, integer_type_node, NULL_TREE);
18795 tree v4si_ftype_v4si_int
18796 = build_function_type_list (V4SI_type_node,
18797 V4SI_type_node, integer_type_node, NULL_TREE);
18798 tree v8hi_ftype_v8hi_int
18799 = build_function_type_list (V8HI_type_node,
18800 V8HI_type_node, integer_type_node, NULL_TREE);
18801 tree v4si_ftype_v8hi_v8hi
18802 = build_function_type_list (V4SI_type_node,
18803 V8HI_type_node, V8HI_type_node, NULL_TREE);
18804 tree di_ftype_v8qi_v8qi
18805 = build_function_type_list (long_long_unsigned_type_node,
18806 V8QI_type_node, V8QI_type_node, NULL_TREE);
18807 tree di_ftype_v2si_v2si
18808 = build_function_type_list (long_long_unsigned_type_node,
18809 V2SI_type_node, V2SI_type_node, NULL_TREE);
18810 tree v2di_ftype_v16qi_v16qi
18811 = build_function_type_list (V2DI_type_node,
18812 V16QI_type_node, V16QI_type_node, NULL_TREE);
18813 tree v2di_ftype_v4si_v4si
18814 = build_function_type_list (V2DI_type_node,
18815 V4SI_type_node, V4SI_type_node, NULL_TREE);
18816 tree int_ftype_v16qi
18817 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18818 tree v16qi_ftype_pcchar
18819 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18820 tree void_ftype_pchar_v16qi
18821 = build_function_type_list (void_type_node,
18822 pchar_type_node, V16QI_type_node, NULL_TREE);
18824 tree v2di_ftype_v2di_unsigned_unsigned
18825 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18826 unsigned_type_node, unsigned_type_node,
18828 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18829 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18830 unsigned_type_node, unsigned_type_node,
18832 tree v2di_ftype_v2di_v16qi
18833 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18835 tree v2df_ftype_v2df_v2df_v2df
18836 = build_function_type_list (V2DF_type_node,
18837 V2DF_type_node, V2DF_type_node,
18838 V2DF_type_node, NULL_TREE);
18839 tree v4sf_ftype_v4sf_v4sf_v4sf
18840 = build_function_type_list (V4SF_type_node,
18841 V4SF_type_node, V4SF_type_node,
18842 V4SF_type_node, NULL_TREE);
18843 tree v8hi_ftype_v16qi
18844 = build_function_type_list (V8HI_type_node, V16QI_type_node,
18846 tree v4si_ftype_v16qi
18847 = build_function_type_list (V4SI_type_node, V16QI_type_node,
18849 tree v2di_ftype_v16qi
18850 = build_function_type_list (V2DI_type_node, V16QI_type_node,
18852 tree v4si_ftype_v8hi
18853 = build_function_type_list (V4SI_type_node, V8HI_type_node,
18855 tree v2di_ftype_v8hi
18856 = build_function_type_list (V2DI_type_node, V8HI_type_node,
18858 tree v2di_ftype_v4si
18859 = build_function_type_list (V2DI_type_node, V4SI_type_node,
18861 tree v2di_ftype_pv2di
18862 = build_function_type_list (V2DI_type_node, pv2di_type_node,
18864 tree v16qi_ftype_v16qi_v16qi_int
18865 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18866 V16QI_type_node, integer_type_node,
18868 tree v16qi_ftype_v16qi_v16qi_v16qi
18869 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18870 V16QI_type_node, V16QI_type_node,
18872 tree v8hi_ftype_v8hi_v8hi_int
18873 = build_function_type_list (V8HI_type_node, V8HI_type_node,
18874 V8HI_type_node, integer_type_node,
18876 tree v4si_ftype_v4si_v4si_int
18877 = build_function_type_list (V4SI_type_node, V4SI_type_node,
18878 V4SI_type_node, integer_type_node,
18880 tree int_ftype_v2di_v2di
18881 = build_function_type_list (integer_type_node,
18882 V2DI_type_node, V2DI_type_node,
18884 tree int_ftype_v16qi_int_v16qi_int_int
18885 = build_function_type_list (integer_type_node,
18892 tree v16qi_ftype_v16qi_int_v16qi_int_int
18893 = build_function_type_list (V16QI_type_node,
18900 tree int_ftype_v16qi_v16qi_int
18901 = build_function_type_list (integer_type_node,
18907 /* SSE5 instructions */
18908 tree v2di_ftype_v2di_v2di_v2di
18909 = build_function_type_list (V2DI_type_node,
18915 tree v4si_ftype_v4si_v4si_v4si
18916 = build_function_type_list (V4SI_type_node,
18922 tree v4si_ftype_v4si_v4si_v2di
18923 = build_function_type_list (V4SI_type_node,
18929 tree v8hi_ftype_v8hi_v8hi_v8hi
18930 = build_function_type_list (V8HI_type_node,
18936 tree v8hi_ftype_v8hi_v8hi_v4si
18937 = build_function_type_list (V8HI_type_node,
18943 tree v2df_ftype_v2df_v2df_v16qi
18944 = build_function_type_list (V2DF_type_node,
18950 tree v4sf_ftype_v4sf_v4sf_v16qi
18951 = build_function_type_list (V4SF_type_node,
18957 tree v2di_ftype_v2di_si
18958 = build_function_type_list (V2DI_type_node,
18963 tree v4si_ftype_v4si_si
18964 = build_function_type_list (V4SI_type_node,
18969 tree v8hi_ftype_v8hi_si
18970 = build_function_type_list (V8HI_type_node,
18975 tree v16qi_ftype_v16qi_si
18976 = build_function_type_list (V16QI_type_node,
18980 tree v4sf_ftype_v4hi
18981 = build_function_type_list (V4SF_type_node,
18985 tree v4hi_ftype_v4sf
18986 = build_function_type_list (V4HI_type_node,
18990 tree v2di_ftype_v2di
18991 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18995 /* The __float80 type. */
18996 if (TYPE_MODE (long_double_type_node) == XFmode)
18997 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19001 /* The __float80 type. */
19002 tree float80_type_node = make_node (REAL_TYPE);
19004 TYPE_PRECISION (float80_type_node) = 80;
19005 layout_type (float80_type_node);
19006 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19012 tree float128_type_node = make_node (REAL_TYPE);
19014 TYPE_PRECISION (float128_type_node) = 128;
19015 layout_type (float128_type_node);
19016 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19019 /* TFmode support builtins. */
19020 ftype = build_function_type (float128_type_node,
19022 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19024 ftype = build_function_type_list (float128_type_node,
19025 float128_type_node,
19027 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19029 ftype = build_function_type_list (float128_type_node,
19030 float128_type_node,
19031 float128_type_node,
19033 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19036 /* Add all SSE builtins that are more or less simple operations on
19038 for (i = 0, d = bdesc_sse_3arg;
19039 i < ARRAY_SIZE (bdesc_sse_3arg);
19042 /* Use one of the operands; the target can have a different mode for
19043 mask-generating compares. */
19044 enum machine_mode mode;
19049 mode = insn_data[d->icode].operand[1].mode;
19054 type = v16qi_ftype_v16qi_v16qi_int;
19057 type = v8hi_ftype_v8hi_v8hi_int;
19060 type = v4si_ftype_v4si_v4si_int;
19063 type = v2di_ftype_v2di_v2di_int;
19066 type = v2df_ftype_v2df_v2df_int;
19069 type = v4sf_ftype_v4sf_v4sf_int;
19072 gcc_unreachable ();
19075 /* Override for variable blends. */
19078 case CODE_FOR_sse4_1_blendvpd:
19079 type = v2df_ftype_v2df_v2df_v2df;
19081 case CODE_FOR_sse4_1_blendvps:
19082 type = v4sf_ftype_v4sf_v4sf_v4sf;
19084 case CODE_FOR_sse4_1_pblendvb:
19085 type = v16qi_ftype_v16qi_v16qi_v16qi;
19091 def_builtin_const (d->mask, d->name, type, d->code);
19094 /* Add all builtins that are more or less simple operations on two
19096 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19098 /* Use one of the operands; the target can have a different mode for
19099 mask-generating compares. */
19100 enum machine_mode mode;
19105 mode = insn_data[d->icode].operand[1].mode;
19110 type = v16qi_ftype_v16qi_v16qi;
19113 type = v8hi_ftype_v8hi_v8hi;
19116 type = v4si_ftype_v4si_v4si;
19119 type = v2di_ftype_v2di_v2di;
19122 type = v2df_ftype_v2df_v2df;
19125 type = v4sf_ftype_v4sf_v4sf;
19128 type = v8qi_ftype_v8qi_v8qi;
19131 type = v4hi_ftype_v4hi_v4hi;
19134 type = v2si_ftype_v2si_v2si;
19137 type = di_ftype_di_di;
19141 gcc_unreachable ();
19144 /* Override for comparisons. */
19145 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19146 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19147 type = v4si_ftype_v4sf_v4sf;
19149 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19150 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19151 type = v2di_ftype_v2df_v2df;
19153 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19154 type = v4si_ftype_v2df_v2df;
19156 def_builtin_const (d->mask, d->name, type, d->code);
19159 /* Add all builtins that are more or less simple operations on 1 operand. */
19160 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19162 enum machine_mode mode;
19167 mode = insn_data[d->icode].operand[1].mode;
19172 type = v16qi_ftype_v16qi;
19175 type = v8hi_ftype_v8hi;
19178 type = v4si_ftype_v4si;
19181 type = v2df_ftype_v2df;
19184 type = v4sf_ftype_v4sf;
19187 type = v8qi_ftype_v8qi;
19190 type = v4hi_ftype_v4hi;
19193 type = v2si_ftype_v2si;
19200 def_builtin_const (d->mask, d->name, type, d->code);
19203 /* pcmpestr[im] insns. */
19204 for (i = 0, d = bdesc_pcmpestr;
19205 i < ARRAY_SIZE (bdesc_pcmpestr);
19208 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19209 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19211 ftype = int_ftype_v16qi_int_v16qi_int_int;
19212 def_builtin_const (d->mask, d->name, ftype, d->code);
19215 /* pcmpistr[im] insns. */
19216 for (i = 0, d = bdesc_pcmpistr;
19217 i < ARRAY_SIZE (bdesc_pcmpistr);
19220 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19221 ftype = v16qi_ftype_v16qi_v16qi_int;
19223 ftype = int_ftype_v16qi_v16qi_int;
19224 def_builtin_const (d->mask, d->name, ftype, d->code);
19227 /* Add the remaining MMX insns with somewhat more complicated types. */
19228 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19229 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
19230 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
19231 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
19233 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
19234 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
19235 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
19237 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
19238 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
19240 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19241 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19243 /* comi/ucomi insns. */
19244 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19245 if (d->mask == OPTION_MASK_ISA_SSE2)
19246 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19248 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19251 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19252 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19254 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19255 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19256 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19258 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19259 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19260 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19261 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19262 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19263 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19264 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19265 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19266 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19267 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19268 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19270 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19272 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19273 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19275 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19276 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19277 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19278 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19280 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19281 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19282 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19283 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19285 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19287 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19289 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19290 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19291 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19292 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS_NR);
19293 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19294 ftype = build_function_type_list (float_type_node,
19297 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19298 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19299 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS_NR);
19300 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19302 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
19304 /* Original 3DNow! */
19305 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19306 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19307 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19308 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19309 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19310 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19311 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19312 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19313 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19314 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19315 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19316 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19317 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19318 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19319 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19320 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19321 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19322 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19323 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19324 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19326 /* 3DNow! extension as used in the Athlon CPU. */
19327 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19328 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19329 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19330 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19331 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19332 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19335 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19337 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19338 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19340 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19341 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19343 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19344 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19345 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19346 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19347 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19349 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19350 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19351 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19352 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19354 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19355 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19357 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
19359 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19360 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19362 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19363 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19364 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19365 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19366 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19368 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19370 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19371 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19372 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19373 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19375 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19376 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19377 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19379 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19380 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19381 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19382 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19384 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19385 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19386 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19388 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19389 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19391 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19392 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19394 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19395 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19396 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19397 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19398 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19399 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19400 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19402 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19403 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19404 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19405 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19406 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19407 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19408 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19410 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19411 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19412 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19413 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19415 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19417 /* Prescott New Instructions. */
19418 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19419 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19420 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19423 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19424 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19427 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19428 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19429 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19430 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19431 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19432 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19433 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19434 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19435 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19436 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19437 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19438 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19439 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19440 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19442 /* SSE4.1 and SSE5 */
19443 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
19444 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
19445 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
19446 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
19449 ftype = build_function_type_list (unsigned_type_node,
19450 unsigned_type_node,
19451 unsigned_char_type_node,
19453 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19454 ftype = build_function_type_list (unsigned_type_node,
19455 unsigned_type_node,
19456 short_unsigned_type_node,
19458 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19459 ftype = build_function_type_list (unsigned_type_node,
19460 unsigned_type_node,
19461 unsigned_type_node,
19463 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19464 ftype = build_function_type_list (long_long_unsigned_type_node,
19465 long_long_unsigned_type_node,
19466 long_long_unsigned_type_node,
19468 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19470 /* AMDFAM10 SSE4A New built-ins */
19471 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19472 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19473 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19474 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19475 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19476 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19478 /* Access to the vec_init patterns. */
19479 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19480 integer_type_node, NULL_TREE);
19481 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19483 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19484 short_integer_type_node,
19485 short_integer_type_node,
19486 short_integer_type_node, NULL_TREE);
19487 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19489 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19490 char_type_node, char_type_node,
19491 char_type_node, char_type_node,
19492 char_type_node, char_type_node,
19493 char_type_node, NULL_TREE);
19494 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19496 /* Access to the vec_extract patterns. */
19497 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19498 integer_type_node, NULL_TREE);
19499 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19501 ftype = build_function_type_list (long_long_integer_type_node,
19502 V2DI_type_node, integer_type_node,
19504 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19506 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19507 integer_type_node, NULL_TREE);
19508 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19510 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19511 integer_type_node, NULL_TREE);
19512 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19514 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19515 integer_type_node, NULL_TREE);
19516 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19518 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19519 integer_type_node, NULL_TREE);
19520 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19522 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19523 integer_type_node, NULL_TREE);
19524 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19526 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19527 integer_type_node, NULL_TREE);
19528 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19530 /* Access to the vec_set patterns. */
19531 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19533 integer_type_node, NULL_TREE);
19534 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19536 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19538 integer_type_node, NULL_TREE);
19539 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19541 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19543 integer_type_node, NULL_TREE);
19544 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19546 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19548 integer_type_node, NULL_TREE);
19549 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19551 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19553 integer_type_node, NULL_TREE);
19554 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19556 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19558 integer_type_node, NULL_TREE);
19559 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19561 /* Add SSE5 multi-arg argument instructions */
19562 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19564 tree mtype = NULL_TREE;
19569 switch ((enum multi_arg_type)d->flag)
19571 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19572 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19573 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19574 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19575 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19576 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19577 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19578 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19579 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19580 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19581 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19582 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19583 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19584 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19585 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19586 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19587 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19588 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19589 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19590 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19591 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19592 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19593 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19594 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19595 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19596 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19597 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19598 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19599 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19600 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19601 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19602 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19603 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19604 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19605 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19606 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19607 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19608 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19609 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19610 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19611 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19612 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19613 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19614 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19615 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19616 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19617 case MULTI_ARG_UNKNOWN:
19619 gcc_unreachable ();
19623 def_builtin_const (d->mask, d->name, mtype, d->code);
19628 ix86_init_builtins (void)
19631 ix86_init_mmx_sse_builtins ();
19634 /* Errors in the source file can cause expand_expr to return const0_rtx
19635 where we expect a vector. To avoid crashing, use one of the vector
19636 clear instructions. */
19638 safe_vector_operand (rtx x, enum machine_mode mode)
19640 if (x == const0_rtx)
19641 x = CONST0_RTX (mode);
19645 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19646 4 operands. The third argument must be a constant smaller than 8
19650 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
19654 tree arg0 = CALL_EXPR_ARG (exp, 0);
19655 tree arg1 = CALL_EXPR_ARG (exp, 1);
19656 tree arg2 = CALL_EXPR_ARG (exp, 2);
19657 rtx op0 = expand_normal (arg0);
19658 rtx op1 = expand_normal (arg1);
19659 rtx op2 = expand_normal (arg2);
19660 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19661 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19662 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
19663 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
19665 if (VECTOR_MODE_P (mode1))
19666 op0 = safe_vector_operand (op0, mode1);
19667 if (VECTOR_MODE_P (mode2))
19668 op1 = safe_vector_operand (op1, mode2);
19669 if (VECTOR_MODE_P (mode3))
19670 op2 = safe_vector_operand (op2, mode3);
19674 || GET_MODE (target) != tmode
19675 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19676 target = gen_reg_rtx (tmode);
19678 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19679 op0 = copy_to_mode_reg (mode1, op0);
19680 if ((optimize && !register_operand (op1, mode2))
19681 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
19682 op1 = copy_to_mode_reg (mode2, op1);
19684 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19687 case CODE_FOR_sse4_1_blendvpd:
19688 case CODE_FOR_sse4_1_blendvps:
19689 case CODE_FOR_sse4_1_pblendvb:
19690 op2 = copy_to_mode_reg (mode3, op2);
19693 case CODE_FOR_sse4_1_roundsd:
19694 case CODE_FOR_sse4_1_roundss:
19695 error ("the third argument must be a 4-bit immediate");
19699 error ("the third argument must be an 8-bit immediate");
19703 pat = GEN_FCN (icode) (target, op0, op1, op2);
19710 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19713 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19716 tree arg0 = CALL_EXPR_ARG (exp, 0);
19717 tree arg1 = CALL_EXPR_ARG (exp, 1);
19718 rtx op0 = expand_normal (arg0);
19719 rtx op1 = expand_normal (arg1);
19720 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19721 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19722 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19726 || GET_MODE (target) != tmode
19727 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19728 target = gen_reg_rtx (tmode);
19730 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19731 op0 = copy_to_mode_reg (mode0, op0);
19732 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19734 op1 = copy_to_reg (op1);
19735 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19738 pat = GEN_FCN (icode) (target, op0, op1);
19745 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19748 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
19751 tree arg0 = CALL_EXPR_ARG (exp, 0);
19752 tree arg1 = CALL_EXPR_ARG (exp, 1);
19753 rtx op0 = expand_normal (arg0);
19754 rtx op1 = expand_normal (arg1);
19755 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19756 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19757 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19759 if (VECTOR_MODE_P (mode0))
19760 op0 = safe_vector_operand (op0, mode0);
19761 if (VECTOR_MODE_P (mode1))
19762 op1 = safe_vector_operand (op1, mode1);
19764 if (optimize || !target
19765 || GET_MODE (target) != tmode
19766 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19767 target = gen_reg_rtx (tmode);
19769 if (GET_MODE (op1) == SImode && mode1 == TImode)
19771 rtx x = gen_reg_rtx (V4SImode);
19772 emit_insn (gen_sse2_loadd (x, op1));
19773 op1 = gen_lowpart (TImode, x);
19776 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19777 op0 = copy_to_mode_reg (mode0, op0);
19778 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19779 op1 = copy_to_mode_reg (mode1, op1);
19781 /* ??? Using ix86_fixup_binary_operands is problematic when
19782 we've got mismatched modes. Fake it. */
19788 if (tmode == mode0 && tmode == mode1)
19790 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
19794 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
19796 op0 = force_reg (mode0, op0);
19797 op1 = force_reg (mode1, op1);
19798 target = gen_reg_rtx (tmode);
19801 pat = GEN_FCN (icode) (target, op0, op1);
19808 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19811 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
19812 enum multi_arg_type m_type,
19813 enum insn_code sub_code)
19818 bool comparison_p = false;
19820 bool last_arg_constant = false;
19821 int num_memory = 0;
19824 enum machine_mode mode;
19827 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19831 case MULTI_ARG_3_SF:
19832 case MULTI_ARG_3_DF:
19833 case MULTI_ARG_3_DI:
19834 case MULTI_ARG_3_SI:
19835 case MULTI_ARG_3_SI_DI:
19836 case MULTI_ARG_3_HI:
19837 case MULTI_ARG_3_HI_SI:
19838 case MULTI_ARG_3_QI:
19839 case MULTI_ARG_3_PERMPS:
19840 case MULTI_ARG_3_PERMPD:
19844 case MULTI_ARG_2_SF:
19845 case MULTI_ARG_2_DF:
19846 case MULTI_ARG_2_DI:
19847 case MULTI_ARG_2_SI:
19848 case MULTI_ARG_2_HI:
19849 case MULTI_ARG_2_QI:
19853 case MULTI_ARG_2_DI_IMM:
19854 case MULTI_ARG_2_SI_IMM:
19855 case MULTI_ARG_2_HI_IMM:
19856 case MULTI_ARG_2_QI_IMM:
19858 last_arg_constant = true;
19861 case MULTI_ARG_1_SF:
19862 case MULTI_ARG_1_DF:
19863 case MULTI_ARG_1_DI:
19864 case MULTI_ARG_1_SI:
19865 case MULTI_ARG_1_HI:
19866 case MULTI_ARG_1_QI:
19867 case MULTI_ARG_1_SI_DI:
19868 case MULTI_ARG_1_HI_DI:
19869 case MULTI_ARG_1_HI_SI:
19870 case MULTI_ARG_1_QI_DI:
19871 case MULTI_ARG_1_QI_SI:
19872 case MULTI_ARG_1_QI_HI:
19873 case MULTI_ARG_1_PH2PS:
19874 case MULTI_ARG_1_PS2PH:
19878 case MULTI_ARG_2_SF_CMP:
19879 case MULTI_ARG_2_DF_CMP:
19880 case MULTI_ARG_2_DI_CMP:
19881 case MULTI_ARG_2_SI_CMP:
19882 case MULTI_ARG_2_HI_CMP:
19883 case MULTI_ARG_2_QI_CMP:
19885 comparison_p = true;
19888 case MULTI_ARG_2_SF_TF:
19889 case MULTI_ARG_2_DF_TF:
19890 case MULTI_ARG_2_DI_TF:
19891 case MULTI_ARG_2_SI_TF:
19892 case MULTI_ARG_2_HI_TF:
19893 case MULTI_ARG_2_QI_TF:
19898 case MULTI_ARG_UNKNOWN:
19900 gcc_unreachable ();
19903 if (optimize || !target
19904 || GET_MODE (target) != tmode
19905 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19906 target = gen_reg_rtx (tmode);
19908 gcc_assert (nargs <= 4);
19910 for (i = 0; i < nargs; i++)
19912 tree arg = CALL_EXPR_ARG (exp, i);
19913 rtx op = expand_normal (arg);
19914 int adjust = (comparison_p) ? 1 : 0;
19915 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
19917 if (last_arg_constant && i == nargs-1)
19919 if (GET_CODE (op) != CONST_INT)
19921 error ("last argument must be an immediate");
19922 return gen_reg_rtx (tmode);
19927 if (VECTOR_MODE_P (mode))
19928 op = safe_vector_operand (op, mode);
19930 /* If we aren't optimizing, only allow one memory operand to be
19932 if (memory_operand (op, mode))
19935 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
19938 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
19940 op = force_reg (mode, op);
19944 args[i].mode = mode;
19950 pat = GEN_FCN (icode) (target, args[0].op);
19955 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19956 GEN_INT ((int)sub_code));
19957 else if (! comparison_p)
19958 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19961 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
19965 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
19970 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
19974 gcc_unreachable ();
19984 /* Subroutine of ix86_expand_builtin to take care of stores. */
19987 ix86_expand_store_builtin (enum insn_code icode, tree exp)
19990 tree arg0 = CALL_EXPR_ARG (exp, 0);
19991 tree arg1 = CALL_EXPR_ARG (exp, 1);
19992 rtx op0 = expand_normal (arg0);
19993 rtx op1 = expand_normal (arg1);
19994 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
19995 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19997 if (VECTOR_MODE_P (mode1))
19998 op1 = safe_vector_operand (op1, mode1);
20000 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20001 op1 = copy_to_mode_reg (mode1, op1);
20003 pat = GEN_FCN (icode) (op0, op1);
20009 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
20012 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
20013 rtx target, int do_load)
20016 tree arg0 = CALL_EXPR_ARG (exp, 0);
20017 rtx op0 = expand_normal (arg0);
20018 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20019 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20021 if (optimize || !target
20022 || GET_MODE (target) != tmode
20023 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20024 target = gen_reg_rtx (tmode);
20026 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20029 if (VECTOR_MODE_P (mode0))
20030 op0 = safe_vector_operand (op0, mode0);
20032 if ((optimize && !register_operand (op0, mode0))
20033 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20034 op0 = copy_to_mode_reg (mode0, op0);
20039 case CODE_FOR_sse4_1_roundpd:
20040 case CODE_FOR_sse4_1_roundps:
20042 tree arg1 = CALL_EXPR_ARG (exp, 1);
20043 rtx op1 = expand_normal (arg1);
20044 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20046 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20048 error ("the second argument must be a 4-bit immediate");
20051 pat = GEN_FCN (icode) (target, op0, op1);
20055 pat = GEN_FCN (icode) (target, op0);
20065 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20066 sqrtss, rsqrtss, rcpss. */
20069 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
20072 tree arg0 = CALL_EXPR_ARG (exp, 0);
20073 rtx op1, op0 = expand_normal (arg0);
20074 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20075 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20077 if (optimize || !target
20078 || GET_MODE (target) != tmode
20079 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20080 target = gen_reg_rtx (tmode);
20082 if (VECTOR_MODE_P (mode0))
20083 op0 = safe_vector_operand (op0, mode0);
20085 if ((optimize && !register_operand (op0, mode0))
20086 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20087 op0 = copy_to_mode_reg (mode0, op0);
20090 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20091 op1 = copy_to_mode_reg (mode0, op1);
20093 pat = GEN_FCN (icode) (target, op0, op1);
20100 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20103 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
20107 tree arg0 = CALL_EXPR_ARG (exp, 0);
20108 tree arg1 = CALL_EXPR_ARG (exp, 1);
20109 rtx op0 = expand_normal (arg0);
20110 rtx op1 = expand_normal (arg1);
20112 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20113 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20114 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20115 enum rtx_code comparison = d->comparison;
20117 if (VECTOR_MODE_P (mode0))
20118 op0 = safe_vector_operand (op0, mode0);
20119 if (VECTOR_MODE_P (mode1))
20120 op1 = safe_vector_operand (op1, mode1);
20122 /* Swap operands if we have a comparison that isn't available in
20124 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20126 rtx tmp = gen_reg_rtx (mode1);
20127 emit_move_insn (tmp, op1);
20132 if (optimize || !target
20133 || GET_MODE (target) != tmode
20134 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20135 target = gen_reg_rtx (tmode);
20137 if ((optimize && !register_operand (op0, mode0))
20138 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20139 op0 = copy_to_mode_reg (mode0, op0);
20140 if ((optimize && !register_operand (op1, mode1))
20141 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20142 op1 = copy_to_mode_reg (mode1, op1);
20144 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20145 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20152 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20155 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20159 tree arg0 = CALL_EXPR_ARG (exp, 0);
20160 tree arg1 = CALL_EXPR_ARG (exp, 1);
20161 rtx op0 = expand_normal (arg0);
20162 rtx op1 = expand_normal (arg1);
20163 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20164 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20165 enum rtx_code comparison = d->comparison;
20167 if (VECTOR_MODE_P (mode0))
20168 op0 = safe_vector_operand (op0, mode0);
20169 if (VECTOR_MODE_P (mode1))
20170 op1 = safe_vector_operand (op1, mode1);
20172 /* Swap operands if we have a comparison that isn't available in
20174 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20181 target = gen_reg_rtx (SImode);
20182 emit_move_insn (target, const0_rtx);
20183 target = gen_rtx_SUBREG (QImode, target, 0);
20185 if ((optimize && !register_operand (op0, mode0))
20186 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20187 op0 = copy_to_mode_reg (mode0, op0);
20188 if ((optimize && !register_operand (op1, mode1))
20189 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20190 op1 = copy_to_mode_reg (mode1, op1);
20192 pat = GEN_FCN (d->icode) (op0, op1);
20196 emit_insn (gen_rtx_SET (VOIDmode,
20197 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20198 gen_rtx_fmt_ee (comparison, QImode,
20202 return SUBREG_REG (target);
20205 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20208 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20212 tree arg0 = CALL_EXPR_ARG (exp, 0);
20213 tree arg1 = CALL_EXPR_ARG (exp, 1);
20214 rtx op0 = expand_normal (arg0);
20215 rtx op1 = expand_normal (arg1);
20216 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20217 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20218 enum rtx_code comparison = d->comparison;
20220 if (VECTOR_MODE_P (mode0))
20221 op0 = safe_vector_operand (op0, mode0);
20222 if (VECTOR_MODE_P (mode1))
20223 op1 = safe_vector_operand (op1, mode1);
20225 target = gen_reg_rtx (SImode);
20226 emit_move_insn (target, const0_rtx);
20227 target = gen_rtx_SUBREG (QImode, target, 0);
20229 if ((optimize && !register_operand (op0, mode0))
20230 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20231 op0 = copy_to_mode_reg (mode0, op0);
20232 if ((optimize && !register_operand (op1, mode1))
20233 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20234 op1 = copy_to_mode_reg (mode1, op1);
20236 pat = GEN_FCN (d->icode) (op0, op1);
20240 emit_insn (gen_rtx_SET (VOIDmode,
20241 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20242 gen_rtx_fmt_ee (comparison, QImode,
20246 return SUBREG_REG (target);
20249 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20252 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20253 tree exp, rtx target)
20256 tree arg0 = CALL_EXPR_ARG (exp, 0);
20257 tree arg1 = CALL_EXPR_ARG (exp, 1);
20258 tree arg2 = CALL_EXPR_ARG (exp, 2);
20259 tree arg3 = CALL_EXPR_ARG (exp, 3);
20260 tree arg4 = CALL_EXPR_ARG (exp, 4);
20261 rtx scratch0, scratch1;
20262 rtx op0 = expand_normal (arg0);
20263 rtx op1 = expand_normal (arg1);
20264 rtx op2 = expand_normal (arg2);
20265 rtx op3 = expand_normal (arg3);
20266 rtx op4 = expand_normal (arg4);
20267 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20269 tmode0 = insn_data[d->icode].operand[0].mode;
20270 tmode1 = insn_data[d->icode].operand[1].mode;
20271 modev2 = insn_data[d->icode].operand[2].mode;
20272 modei3 = insn_data[d->icode].operand[3].mode;
20273 modev4 = insn_data[d->icode].operand[4].mode;
20274 modei5 = insn_data[d->icode].operand[5].mode;
20275 modeimm = insn_data[d->icode].operand[6].mode;
20277 if (VECTOR_MODE_P (modev2))
20278 op0 = safe_vector_operand (op0, modev2);
20279 if (VECTOR_MODE_P (modev4))
20280 op2 = safe_vector_operand (op2, modev4);
20282 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20283 op0 = copy_to_mode_reg (modev2, op0);
20284 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20285 op1 = copy_to_mode_reg (modei3, op1);
20286 if ((optimize && !register_operand (op2, modev4))
20287 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20288 op2 = copy_to_mode_reg (modev4, op2);
20289 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20290 op3 = copy_to_mode_reg (modei5, op3);
20292 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20294 error ("the fifth argument must be a 8-bit immediate");
20298 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20300 if (optimize || !target
20301 || GET_MODE (target) != tmode0
20302 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20303 target = gen_reg_rtx (tmode0);
20305 scratch1 = gen_reg_rtx (tmode1);
20307 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20309 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20311 if (optimize || !target
20312 || GET_MODE (target) != tmode1
20313 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20314 target = gen_reg_rtx (tmode1);
20316 scratch0 = gen_reg_rtx (tmode0);
20318 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20322 gcc_assert (d->flag);
20324 scratch0 = gen_reg_rtx (tmode0);
20325 scratch1 = gen_reg_rtx (tmode1);
20327 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20337 target = gen_reg_rtx (SImode);
20338 emit_move_insn (target, const0_rtx);
20339 target = gen_rtx_SUBREG (QImode, target, 0);
20342 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20343 gen_rtx_fmt_ee (EQ, QImode,
20344 gen_rtx_REG ((enum machine_mode) d->flag,
20347 return SUBREG_REG (target);
20354 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20357 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20358 tree exp, rtx target)
20361 tree arg0 = CALL_EXPR_ARG (exp, 0);
20362 tree arg1 = CALL_EXPR_ARG (exp, 1);
20363 tree arg2 = CALL_EXPR_ARG (exp, 2);
20364 rtx scratch0, scratch1;
20365 rtx op0 = expand_normal (arg0);
20366 rtx op1 = expand_normal (arg1);
20367 rtx op2 = expand_normal (arg2);
20368 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20370 tmode0 = insn_data[d->icode].operand[0].mode;
20371 tmode1 = insn_data[d->icode].operand[1].mode;
20372 modev2 = insn_data[d->icode].operand[2].mode;
20373 modev3 = insn_data[d->icode].operand[3].mode;
20374 modeimm = insn_data[d->icode].operand[4].mode;
20376 if (VECTOR_MODE_P (modev2))
20377 op0 = safe_vector_operand (op0, modev2);
20378 if (VECTOR_MODE_P (modev3))
20379 op1 = safe_vector_operand (op1, modev3);
20381 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20382 op0 = copy_to_mode_reg (modev2, op0);
20383 if ((optimize && !register_operand (op1, modev3))
20384 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20385 op1 = copy_to_mode_reg (modev3, op1);
20387 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20389 error ("the third argument must be a 8-bit immediate");
20393 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20395 if (optimize || !target
20396 || GET_MODE (target) != tmode0
20397 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20398 target = gen_reg_rtx (tmode0);
20400 scratch1 = gen_reg_rtx (tmode1);
20402 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20404 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20406 if (optimize || !target
20407 || GET_MODE (target) != tmode1
20408 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20409 target = gen_reg_rtx (tmode1);
20411 scratch0 = gen_reg_rtx (tmode0);
20413 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20417 gcc_assert (d->flag);
20419 scratch0 = gen_reg_rtx (tmode0);
20420 scratch1 = gen_reg_rtx (tmode1);
20422 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20432 target = gen_reg_rtx (SImode);
20433 emit_move_insn (target, const0_rtx);
20434 target = gen_rtx_SUBREG (QImode, target, 0);
20437 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20438 gen_rtx_fmt_ee (EQ, QImode,
20439 gen_rtx_REG ((enum machine_mode) d->flag,
20442 return SUBREG_REG (target);
20448 /* Return the integer constant in ARG. Constrain it to be in the range
20449 of the subparts of VEC_TYPE; issue an error if not. */
20452 get_element_number (tree vec_type, tree arg)
20454 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20456 if (!host_integerp (arg, 1)
20457 || (elt = tree_low_cst (arg, 1), elt > max))
20459 error ("selector must be an integer constant in the range 0..%wi", max);
20466 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20467 ix86_expand_vector_init. We DO have language-level syntax for this, in
20468 the form of (type){ init-list }. Except that since we can't place emms
20469 instructions from inside the compiler, we can't allow the use of MMX
20470 registers unless the user explicitly asks for it. So we do *not* define
20471 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20472 we have builtins invoked by mmintrin.h that gives us license to emit
20473 these sorts of instructions. */
20476 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20478 enum machine_mode tmode = TYPE_MODE (type);
20479 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20480 int i, n_elt = GET_MODE_NUNITS (tmode);
20481 rtvec v = rtvec_alloc (n_elt);
20483 gcc_assert (VECTOR_MODE_P (tmode));
20484 gcc_assert (call_expr_nargs (exp) == n_elt);
20486 for (i = 0; i < n_elt; ++i)
20488 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20489 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20492 if (!target || !register_operand (target, tmode))
20493 target = gen_reg_rtx (tmode);
20495 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20499 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20500 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20501 had a language-level syntax for referencing vector elements. */
20504 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20506 enum machine_mode tmode, mode0;
20511 arg0 = CALL_EXPR_ARG (exp, 0);
20512 arg1 = CALL_EXPR_ARG (exp, 1);
20514 op0 = expand_normal (arg0);
20515 elt = get_element_number (TREE_TYPE (arg0), arg1);
20517 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20518 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20519 gcc_assert (VECTOR_MODE_P (mode0));
20521 op0 = force_reg (mode0, op0);
20523 if (optimize || !target || !register_operand (target, tmode))
20524 target = gen_reg_rtx (tmode);
20526 ix86_expand_vector_extract (true, target, op0, elt);
20531 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20532 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20533 a language-level syntax for referencing vector elements. */
20536 ix86_expand_vec_set_builtin (tree exp)
20538 enum machine_mode tmode, mode1;
20539 tree arg0, arg1, arg2;
20541 rtx op0, op1, target;
20543 arg0 = CALL_EXPR_ARG (exp, 0);
20544 arg1 = CALL_EXPR_ARG (exp, 1);
20545 arg2 = CALL_EXPR_ARG (exp, 2);
20547 tmode = TYPE_MODE (TREE_TYPE (arg0));
20548 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20549 gcc_assert (VECTOR_MODE_P (tmode));
20551 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20552 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20553 elt = get_element_number (TREE_TYPE (arg0), arg2);
20555 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20556 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20558 op0 = force_reg (tmode, op0);
20559 op1 = force_reg (mode1, op1);
20561 /* OP0 is the source of these builtin functions and shouldn't be
20562 modified. Create a copy, use it and return it as target. */
20563 target = gen_reg_rtx (tmode);
20564 emit_move_insn (target, op0);
20565 ix86_expand_vector_set (true, target, op1, elt);
20570 /* Expand an expression EXP that calls a built-in function,
20571 with result going to TARGET if that's convenient
20572 (and in mode MODE if that's convenient).
20573 SUBTARGET may be used as the target for computing one of EXP's operands.
20574 IGNORE is nonzero if the value is to be ignored. */
20577 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20578 enum machine_mode mode ATTRIBUTE_UNUSED,
20579 int ignore ATTRIBUTE_UNUSED)
20581 const struct builtin_description *d;
20583 enum insn_code icode;
20584 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20585 tree arg0, arg1, arg2, arg3;
20586 rtx op0, op1, op2, op3, pat;
20587 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20588 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20592 case IX86_BUILTIN_EMMS:
20593 emit_insn (gen_mmx_emms ());
20596 case IX86_BUILTIN_SFENCE:
20597 emit_insn (gen_sse_sfence ());
20600 case IX86_BUILTIN_MASKMOVQ:
20601 case IX86_BUILTIN_MASKMOVDQU:
20602 icode = (fcode == IX86_BUILTIN_MASKMOVQ
20603 ? CODE_FOR_mmx_maskmovq
20604 : CODE_FOR_sse2_maskmovdqu);
20605 /* Note the arg order is different from the operand order. */
20606 arg1 = CALL_EXPR_ARG (exp, 0);
20607 arg2 = CALL_EXPR_ARG (exp, 1);
20608 arg0 = CALL_EXPR_ARG (exp, 2);
20609 op0 = expand_normal (arg0);
20610 op1 = expand_normal (arg1);
20611 op2 = expand_normal (arg2);
20612 mode0 = insn_data[icode].operand[0].mode;
20613 mode1 = insn_data[icode].operand[1].mode;
20614 mode2 = insn_data[icode].operand[2].mode;
20616 op0 = force_reg (Pmode, op0);
20617 op0 = gen_rtx_MEM (mode1, op0);
20619 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20620 op0 = copy_to_mode_reg (mode0, op0);
20621 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20622 op1 = copy_to_mode_reg (mode1, op1);
20623 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20624 op2 = copy_to_mode_reg (mode2, op2);
20625 pat = GEN_FCN (icode) (op0, op1, op2);
20631 case IX86_BUILTIN_RSQRTF:
20632 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20634 case IX86_BUILTIN_SQRTSS:
20635 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20636 case IX86_BUILTIN_RSQRTSS:
20637 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20638 case IX86_BUILTIN_RCPSS:
20639 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20641 case IX86_BUILTIN_LOADUPS:
20642 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20644 case IX86_BUILTIN_STOREUPS:
20645 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20647 case IX86_BUILTIN_LOADHPS:
20648 case IX86_BUILTIN_LOADLPS:
20649 case IX86_BUILTIN_LOADHPD:
20650 case IX86_BUILTIN_LOADLPD:
20651 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20652 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20653 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20654 : CODE_FOR_sse2_loadlpd);
20655 arg0 = CALL_EXPR_ARG (exp, 0);
20656 arg1 = CALL_EXPR_ARG (exp, 1);
20657 op0 = expand_normal (arg0);
20658 op1 = expand_normal (arg1);
20659 tmode = insn_data[icode].operand[0].mode;
20660 mode0 = insn_data[icode].operand[1].mode;
20661 mode1 = insn_data[icode].operand[2].mode;
20663 op0 = force_reg (mode0, op0);
20664 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20665 if (optimize || target == 0
20666 || GET_MODE (target) != tmode
20667 || !register_operand (target, tmode))
20668 target = gen_reg_rtx (tmode);
20669 pat = GEN_FCN (icode) (target, op0, op1);
20675 case IX86_BUILTIN_STOREHPS:
20676 case IX86_BUILTIN_STORELPS:
20677 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20678 : CODE_FOR_sse_storelps);
20679 arg0 = CALL_EXPR_ARG (exp, 0);
20680 arg1 = CALL_EXPR_ARG (exp, 1);
20681 op0 = expand_normal (arg0);
20682 op1 = expand_normal (arg1);
20683 mode0 = insn_data[icode].operand[0].mode;
20684 mode1 = insn_data[icode].operand[1].mode;
20686 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20687 op1 = force_reg (mode1, op1);
20689 pat = GEN_FCN (icode) (op0, op1);
20695 case IX86_BUILTIN_MOVNTPS:
20696 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20697 case IX86_BUILTIN_MOVNTQ:
20698 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20700 case IX86_BUILTIN_LDMXCSR:
20701 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20702 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20703 emit_move_insn (target, op0);
20704 emit_insn (gen_sse_ldmxcsr (target));
20707 case IX86_BUILTIN_STMXCSR:
20708 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20709 emit_insn (gen_sse_stmxcsr (target));
20710 return copy_to_mode_reg (SImode, target);
20712 case IX86_BUILTIN_SHUFPS:
20713 case IX86_BUILTIN_SHUFPD:
20714 icode = (fcode == IX86_BUILTIN_SHUFPS
20715 ? CODE_FOR_sse_shufps
20716 : CODE_FOR_sse2_shufpd);
20717 arg0 = CALL_EXPR_ARG (exp, 0);
20718 arg1 = CALL_EXPR_ARG (exp, 1);
20719 arg2 = CALL_EXPR_ARG (exp, 2);
20720 op0 = expand_normal (arg0);
20721 op1 = expand_normal (arg1);
20722 op2 = expand_normal (arg2);
20723 tmode = insn_data[icode].operand[0].mode;
20724 mode0 = insn_data[icode].operand[1].mode;
20725 mode1 = insn_data[icode].operand[2].mode;
20726 mode2 = insn_data[icode].operand[3].mode;
20728 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20729 op0 = copy_to_mode_reg (mode0, op0);
20730 if ((optimize && !register_operand (op1, mode1))
20731 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
20732 op1 = copy_to_mode_reg (mode1, op1);
20733 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20735 /* @@@ better error message */
20736 error ("mask must be an immediate");
20737 return gen_reg_rtx (tmode);
20739 if (optimize || target == 0
20740 || GET_MODE (target) != tmode
20741 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20742 target = gen_reg_rtx (tmode);
20743 pat = GEN_FCN (icode) (target, op0, op1, op2);
20749 case IX86_BUILTIN_PSHUFW:
20750 case IX86_BUILTIN_PSHUFD:
20751 case IX86_BUILTIN_PSHUFHW:
20752 case IX86_BUILTIN_PSHUFLW:
20753 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20754 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20755 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20756 : CODE_FOR_mmx_pshufw);
20757 arg0 = CALL_EXPR_ARG (exp, 0);
20758 arg1 = CALL_EXPR_ARG (exp, 1);
20759 op0 = expand_normal (arg0);
20760 op1 = expand_normal (arg1);
20761 tmode = insn_data[icode].operand[0].mode;
20762 mode1 = insn_data[icode].operand[1].mode;
20763 mode2 = insn_data[icode].operand[2].mode;
20765 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20766 op0 = copy_to_mode_reg (mode1, op0);
20767 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20769 /* @@@ better error message */
20770 error ("mask must be an immediate");
20774 || GET_MODE (target) != tmode
20775 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20776 target = gen_reg_rtx (tmode);
20777 pat = GEN_FCN (icode) (target, op0, op1);
20783 case IX86_BUILTIN_PSLLW128:
20784 case IX86_BUILTIN_PSLLWI128:
20785 icode = CODE_FOR_ashlv8hi3;
20787 case IX86_BUILTIN_PSLLD128:
20788 case IX86_BUILTIN_PSLLDI128:
20789 icode = CODE_FOR_ashlv4si3;
20791 case IX86_BUILTIN_PSLLQ128:
20792 case IX86_BUILTIN_PSLLQI128:
20793 icode = CODE_FOR_ashlv2di3;
20795 case IX86_BUILTIN_PSRAW128:
20796 case IX86_BUILTIN_PSRAWI128:
20797 icode = CODE_FOR_ashrv8hi3;
20799 case IX86_BUILTIN_PSRAD128:
20800 case IX86_BUILTIN_PSRADI128:
20801 icode = CODE_FOR_ashrv4si3;
20803 case IX86_BUILTIN_PSRLW128:
20804 case IX86_BUILTIN_PSRLWI128:
20805 icode = CODE_FOR_lshrv8hi3;
20807 case IX86_BUILTIN_PSRLD128:
20808 case IX86_BUILTIN_PSRLDI128:
20809 icode = CODE_FOR_lshrv4si3;
20811 case IX86_BUILTIN_PSRLQ128:
20812 case IX86_BUILTIN_PSRLQI128:
20813 icode = CODE_FOR_lshrv2di3;
20816 arg0 = CALL_EXPR_ARG (exp, 0);
20817 arg1 = CALL_EXPR_ARG (exp, 1);
20818 op0 = expand_normal (arg0);
20819 op1 = expand_normal (arg1);
20821 tmode = insn_data[icode].operand[0].mode;
20822 mode1 = insn_data[icode].operand[1].mode;
20824 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20825 op0 = copy_to_reg (op0);
20827 if (!CONST_INT_P (op1))
20828 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
20830 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
20831 op1 = copy_to_reg (op1);
20833 target = gen_reg_rtx (tmode);
20834 pat = GEN_FCN (icode) (target, op0, op1);
20840 case IX86_BUILTIN_PSLLDQI128:
20841 case IX86_BUILTIN_PSRLDQI128:
20842 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
20843 : CODE_FOR_sse2_lshrti3);
20844 arg0 = CALL_EXPR_ARG (exp, 0);
20845 arg1 = CALL_EXPR_ARG (exp, 1);
20846 op0 = expand_normal (arg0);
20847 op1 = expand_normal (arg1);
20848 tmode = insn_data[icode].operand[0].mode;
20849 mode1 = insn_data[icode].operand[1].mode;
20850 mode2 = insn_data[icode].operand[2].mode;
20852 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20854 op0 = copy_to_reg (op0);
20855 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20857 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20859 error ("shift must be an immediate");
20862 target = gen_reg_rtx (V2DImode);
20863 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
20870 case IX86_BUILTIN_FEMMS:
20871 emit_insn (gen_mmx_femms ());
20874 case IX86_BUILTIN_PAVGUSB:
20875 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
20877 case IX86_BUILTIN_PF2ID:
20878 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
20880 case IX86_BUILTIN_PFACC:
20881 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
20883 case IX86_BUILTIN_PFADD:
20884 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
20886 case IX86_BUILTIN_PFCMPEQ:
20887 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
20889 case IX86_BUILTIN_PFCMPGE:
20890 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
20892 case IX86_BUILTIN_PFCMPGT:
20893 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
20895 case IX86_BUILTIN_PFMAX:
20896 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
20898 case IX86_BUILTIN_PFMIN:
20899 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
20901 case IX86_BUILTIN_PFMUL:
20902 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
20904 case IX86_BUILTIN_PFRCP:
20905 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
20907 case IX86_BUILTIN_PFRCPIT1:
20908 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
20910 case IX86_BUILTIN_PFRCPIT2:
20911 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
20913 case IX86_BUILTIN_PFRSQIT1:
20914 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
20916 case IX86_BUILTIN_PFRSQRT:
20917 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
20919 case IX86_BUILTIN_PFSUB:
20920 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
20922 case IX86_BUILTIN_PFSUBR:
20923 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
20925 case IX86_BUILTIN_PI2FD:
20926 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
20928 case IX86_BUILTIN_PMULHRW:
20929 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
20931 case IX86_BUILTIN_PF2IW:
20932 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
20934 case IX86_BUILTIN_PFNACC:
20935 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
20937 case IX86_BUILTIN_PFPNACC:
20938 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
20940 case IX86_BUILTIN_PI2FW:
20941 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
20943 case IX86_BUILTIN_PSWAPDSI:
20944 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
20946 case IX86_BUILTIN_PSWAPDSF:
20947 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
20949 case IX86_BUILTIN_SQRTSD:
20950 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
20951 case IX86_BUILTIN_LOADUPD:
20952 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
20953 case IX86_BUILTIN_STOREUPD:
20954 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
20956 case IX86_BUILTIN_MFENCE:
20957 emit_insn (gen_sse2_mfence ());
20959 case IX86_BUILTIN_LFENCE:
20960 emit_insn (gen_sse2_lfence ());
20963 case IX86_BUILTIN_CLFLUSH:
20964 arg0 = CALL_EXPR_ARG (exp, 0);
20965 op0 = expand_normal (arg0);
20966 icode = CODE_FOR_sse2_clflush;
20967 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
20968 op0 = copy_to_mode_reg (Pmode, op0);
20970 emit_insn (gen_sse2_clflush (op0));
20973 case IX86_BUILTIN_MOVNTPD:
20974 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
20975 case IX86_BUILTIN_MOVNTDQ:
20976 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
20977 case IX86_BUILTIN_MOVNTI:
20978 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
20980 case IX86_BUILTIN_LOADDQU:
20981 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
20982 case IX86_BUILTIN_STOREDQU:
20983 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
20985 case IX86_BUILTIN_MONITOR:
20986 arg0 = CALL_EXPR_ARG (exp, 0);
20987 arg1 = CALL_EXPR_ARG (exp, 1);
20988 arg2 = CALL_EXPR_ARG (exp, 2);
20989 op0 = expand_normal (arg0);
20990 op1 = expand_normal (arg1);
20991 op2 = expand_normal (arg2);
20993 op0 = copy_to_mode_reg (Pmode, op0);
20995 op1 = copy_to_mode_reg (SImode, op1);
20997 op2 = copy_to_mode_reg (SImode, op2);
20999 emit_insn (gen_sse3_monitor (op0, op1, op2));
21001 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
21004 case IX86_BUILTIN_MWAIT:
21005 arg0 = CALL_EXPR_ARG (exp, 0);
21006 arg1 = CALL_EXPR_ARG (exp, 1);
21007 op0 = expand_normal (arg0);
21008 op1 = expand_normal (arg1);
21010 op0 = copy_to_mode_reg (SImode, op0);
21012 op1 = copy_to_mode_reg (SImode, op1);
21013 emit_insn (gen_sse3_mwait (op0, op1));
21016 case IX86_BUILTIN_LDDQU:
21017 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
21020 case IX86_BUILTIN_PALIGNR:
21021 case IX86_BUILTIN_PALIGNR128:
21022 if (fcode == IX86_BUILTIN_PALIGNR)
21024 icode = CODE_FOR_ssse3_palignrdi;
21029 icode = CODE_FOR_ssse3_palignrti;
21032 arg0 = CALL_EXPR_ARG (exp, 0);
21033 arg1 = CALL_EXPR_ARG (exp, 1);
21034 arg2 = CALL_EXPR_ARG (exp, 2);
21035 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21036 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21037 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21038 tmode = insn_data[icode].operand[0].mode;
21039 mode1 = insn_data[icode].operand[1].mode;
21040 mode2 = insn_data[icode].operand[2].mode;
21041 mode3 = insn_data[icode].operand[3].mode;
21043 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21045 op0 = copy_to_reg (op0);
21046 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
21048 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21050 op1 = copy_to_reg (op1);
21051 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
21053 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21055 error ("shift must be an immediate");
21058 target = gen_reg_rtx (mode);
21059 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
21066 case IX86_BUILTIN_MOVNTDQA:
21067 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
21070 case IX86_BUILTIN_MOVNTSD:
21071 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21073 case IX86_BUILTIN_MOVNTSS:
21074 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21076 case IX86_BUILTIN_INSERTQ:
21077 case IX86_BUILTIN_EXTRQ:
21078 icode = (fcode == IX86_BUILTIN_EXTRQ
21079 ? CODE_FOR_sse4a_extrq
21080 : CODE_FOR_sse4a_insertq);
21081 arg0 = CALL_EXPR_ARG (exp, 0);
21082 arg1 = CALL_EXPR_ARG (exp, 1);
21083 op0 = expand_normal (arg0);
21084 op1 = expand_normal (arg1);
21085 tmode = insn_data[icode].operand[0].mode;
21086 mode1 = insn_data[icode].operand[1].mode;
21087 mode2 = insn_data[icode].operand[2].mode;
21088 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21089 op0 = copy_to_mode_reg (mode1, op0);
21090 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21091 op1 = copy_to_mode_reg (mode2, op1);
21092 if (optimize || target == 0
21093 || GET_MODE (target) != tmode
21094 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21095 target = gen_reg_rtx (tmode);
21096 pat = GEN_FCN (icode) (target, op0, op1);
21102 case IX86_BUILTIN_EXTRQI:
21103 icode = CODE_FOR_sse4a_extrqi;
21104 arg0 = CALL_EXPR_ARG (exp, 0);
21105 arg1 = CALL_EXPR_ARG (exp, 1);
21106 arg2 = CALL_EXPR_ARG (exp, 2);
21107 op0 = expand_normal (arg0);
21108 op1 = expand_normal (arg1);
21109 op2 = expand_normal (arg2);
21110 tmode = insn_data[icode].operand[0].mode;
21111 mode1 = insn_data[icode].operand[1].mode;
21112 mode2 = insn_data[icode].operand[2].mode;
21113 mode3 = insn_data[icode].operand[3].mode;
21114 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21115 op0 = copy_to_mode_reg (mode1, op0);
21116 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21118 error ("index mask must be an immediate");
21119 return gen_reg_rtx (tmode);
21121 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21123 error ("length mask must be an immediate");
21124 return gen_reg_rtx (tmode);
21126 if (optimize || target == 0
21127 || GET_MODE (target) != tmode
21128 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21129 target = gen_reg_rtx (tmode);
21130 pat = GEN_FCN (icode) (target, op0, op1, op2);
21136 case IX86_BUILTIN_INSERTQI:
21137 icode = CODE_FOR_sse4a_insertqi;
21138 arg0 = CALL_EXPR_ARG (exp, 0);
21139 arg1 = CALL_EXPR_ARG (exp, 1);
21140 arg2 = CALL_EXPR_ARG (exp, 2);
21141 arg3 = CALL_EXPR_ARG (exp, 3);
21142 op0 = expand_normal (arg0);
21143 op1 = expand_normal (arg1);
21144 op2 = expand_normal (arg2);
21145 op3 = expand_normal (arg3);
21146 tmode = insn_data[icode].operand[0].mode;
21147 mode1 = insn_data[icode].operand[1].mode;
21148 mode2 = insn_data[icode].operand[2].mode;
21149 mode3 = insn_data[icode].operand[3].mode;
21150 mode4 = insn_data[icode].operand[4].mode;
21152 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21153 op0 = copy_to_mode_reg (mode1, op0);
21155 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21156 op1 = copy_to_mode_reg (mode2, op1);
21158 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21160 error ("index mask must be an immediate");
21161 return gen_reg_rtx (tmode);
21163 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21165 error ("length mask must be an immediate");
21166 return gen_reg_rtx (tmode);
21168 if (optimize || target == 0
21169 || GET_MODE (target) != tmode
21170 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21171 target = gen_reg_rtx (tmode);
21172 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21178 case IX86_BUILTIN_VEC_INIT_V2SI:
21179 case IX86_BUILTIN_VEC_INIT_V4HI:
21180 case IX86_BUILTIN_VEC_INIT_V8QI:
21181 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21183 case IX86_BUILTIN_VEC_EXT_V2DF:
21184 case IX86_BUILTIN_VEC_EXT_V2DI:
21185 case IX86_BUILTIN_VEC_EXT_V4SF:
21186 case IX86_BUILTIN_VEC_EXT_V4SI:
21187 case IX86_BUILTIN_VEC_EXT_V8HI:
21188 case IX86_BUILTIN_VEC_EXT_V2SI:
21189 case IX86_BUILTIN_VEC_EXT_V4HI:
21190 case IX86_BUILTIN_VEC_EXT_V16QI:
21191 return ix86_expand_vec_ext_builtin (exp, target);
21193 case IX86_BUILTIN_VEC_SET_V2DI:
21194 case IX86_BUILTIN_VEC_SET_V4SF:
21195 case IX86_BUILTIN_VEC_SET_V4SI:
21196 case IX86_BUILTIN_VEC_SET_V8HI:
21197 case IX86_BUILTIN_VEC_SET_V4HI:
21198 case IX86_BUILTIN_VEC_SET_V16QI:
21199 return ix86_expand_vec_set_builtin (exp);
21201 case IX86_BUILTIN_INFQ:
21203 REAL_VALUE_TYPE inf;
21207 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21209 tmp = validize_mem (force_const_mem (mode, tmp));
21212 target = gen_reg_rtx (mode);
21214 emit_move_insn (target, tmp);
21218 case IX86_BUILTIN_FABSQ:
21219 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21221 case IX86_BUILTIN_COPYSIGNQ:
21222 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21228 for (i = 0, d = bdesc_sse_3arg;
21229 i < ARRAY_SIZE (bdesc_sse_3arg);
21231 if (d->code == fcode)
21232 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
21235 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21236 if (d->code == fcode)
21238 /* Compares are treated specially. */
21239 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21240 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21241 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21242 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21243 return ix86_expand_sse_compare (d, exp, target);
21245 return ix86_expand_binop_builtin (d->icode, exp, target);
21248 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21249 if (d->code == fcode)
21250 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21252 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21253 if (d->code == fcode)
21254 return ix86_expand_sse_comi (d, exp, target);
21256 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21257 if (d->code == fcode)
21258 return ix86_expand_sse_ptest (d, exp, target);
21260 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21261 if (d->code == fcode)
21262 return ix86_expand_crc32 (d->icode, exp, target);
21264 for (i = 0, d = bdesc_pcmpestr;
21265 i < ARRAY_SIZE (bdesc_pcmpestr);
21267 if (d->code == fcode)
21268 return ix86_expand_sse_pcmpestr (d, exp, target);
21270 for (i = 0, d = bdesc_pcmpistr;
21271 i < ARRAY_SIZE (bdesc_pcmpistr);
21273 if (d->code == fcode)
21274 return ix86_expand_sse_pcmpistr (d, exp, target);
21276 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21277 if (d->code == fcode)
21278 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21279 (enum multi_arg_type)d->flag,
21282 gcc_unreachable ();
21285 /* Returns a function decl for a vectorized version of the builtin function
21286 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21287 if it is not available. */
21290 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21293 enum machine_mode in_mode, out_mode;
21296 if (TREE_CODE (type_out) != VECTOR_TYPE
21297 || TREE_CODE (type_in) != VECTOR_TYPE)
21300 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21301 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21302 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21303 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21307 case BUILT_IN_SQRT:
21308 if (out_mode == DFmode && out_n == 2
21309 && in_mode == DFmode && in_n == 2)
21310 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21313 case BUILT_IN_SQRTF:
21314 if (out_mode == SFmode && out_n == 4
21315 && in_mode == SFmode && in_n == 4)
21316 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21319 case BUILT_IN_LRINT:
21320 if (out_mode == SImode && out_n == 4
21321 && in_mode == DFmode && in_n == 2)
21322 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21325 case BUILT_IN_LRINTF:
21326 if (out_mode == SImode && out_n == 4
21327 && in_mode == SFmode && in_n == 4)
21328 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21335 /* Dispatch to a handler for a vectorization library. */
21336 if (ix86_veclib_handler)
21337 return (*ix86_veclib_handler)(fn, type_out, type_in);
21342 /* Handler for an ACML-style interface to a library with vectorized
21346 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21348 char name[20] = "__vr.._";
21349 tree fntype, new_fndecl, args;
21352 enum machine_mode el_mode, in_mode;
21355 /* The ACML is 64bits only and suitable for unsafe math only as
21356 it does not correctly support parts of IEEE with the required
21357 precision such as denormals. */
21359 || !flag_unsafe_math_optimizations)
21362 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21363 n = TYPE_VECTOR_SUBPARTS (type_out);
21364 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21365 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21366 if (el_mode != in_mode
21376 case BUILT_IN_LOG2:
21377 case BUILT_IN_LOG10:
21380 if (el_mode != DFmode
21385 case BUILT_IN_SINF:
21386 case BUILT_IN_COSF:
21387 case BUILT_IN_EXPF:
21388 case BUILT_IN_POWF:
21389 case BUILT_IN_LOGF:
21390 case BUILT_IN_LOG2F:
21391 case BUILT_IN_LOG10F:
21394 if (el_mode != SFmode
21403 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21404 sprintf (name + 7, "%s", bname+10);
21407 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21408 args = TREE_CHAIN (args))
21412 fntype = build_function_type_list (type_out, type_in, NULL);
21414 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21416 /* Build a function declaration for the vectorized function. */
21417 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21418 TREE_PUBLIC (new_fndecl) = 1;
21419 DECL_EXTERNAL (new_fndecl) = 1;
21420 DECL_IS_NOVOPS (new_fndecl) = 1;
21421 TREE_READONLY (new_fndecl) = 1;
21427 /* Returns a decl of a function that implements conversion of the
21428 input vector of type TYPE, or NULL_TREE if it is not available. */
21431 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21433 if (TREE_CODE (type) != VECTOR_TYPE)
21439 switch (TYPE_MODE (type))
21442 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21447 case FIX_TRUNC_EXPR:
21448 switch (TYPE_MODE (type))
21451 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21461 /* Returns a code for a target-specific builtin that implements
21462 reciprocal of the function, or NULL_TREE if not available. */
21465 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21466 bool sqrt ATTRIBUTE_UNUSED)
21468 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21469 && flag_finite_math_only && !flag_trapping_math
21470 && flag_unsafe_math_optimizations))
21474 /* Machine dependent builtins. */
21477 /* Vectorized version of sqrt to rsqrt conversion. */
21478 case IX86_BUILTIN_SQRTPS_NR:
21479 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21485 /* Normal builtins. */
21488 /* Sqrt to rsqrt conversion. */
21489 case BUILT_IN_SQRTF:
21490 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21497 /* Store OPERAND to the memory after reload is completed. This means
21498 that we can't easily use assign_stack_local. */
21500 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21504 gcc_assert (reload_completed);
21505 if (TARGET_RED_ZONE)
21507 result = gen_rtx_MEM (mode,
21508 gen_rtx_PLUS (Pmode,
21510 GEN_INT (-RED_ZONE_SIZE)));
21511 emit_move_insn (result, operand);
21513 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21519 operand = gen_lowpart (DImode, operand);
21523 gen_rtx_SET (VOIDmode,
21524 gen_rtx_MEM (DImode,
21525 gen_rtx_PRE_DEC (DImode,
21526 stack_pointer_rtx)),
21530 gcc_unreachable ();
21532 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21541 split_di (&operand, 1, operands, operands + 1);
21543 gen_rtx_SET (VOIDmode,
21544 gen_rtx_MEM (SImode,
21545 gen_rtx_PRE_DEC (Pmode,
21546 stack_pointer_rtx)),
21549 gen_rtx_SET (VOIDmode,
21550 gen_rtx_MEM (SImode,
21551 gen_rtx_PRE_DEC (Pmode,
21552 stack_pointer_rtx)),
21557 /* Store HImodes as SImodes. */
21558 operand = gen_lowpart (SImode, operand);
21562 gen_rtx_SET (VOIDmode,
21563 gen_rtx_MEM (GET_MODE (operand),
21564 gen_rtx_PRE_DEC (SImode,
21565 stack_pointer_rtx)),
21569 gcc_unreachable ();
21571 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21576 /* Free operand from the memory. */
21578 ix86_free_from_memory (enum machine_mode mode)
21580 if (!TARGET_RED_ZONE)
21584 if (mode == DImode || TARGET_64BIT)
21588 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21589 to pop or add instruction if registers are available. */
21590 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21591 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21596 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21597 QImode must go into class Q_REGS.
21598 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21599 movdf to do mem-to-mem moves through integer regs. */
21601 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21603 enum machine_mode mode = GET_MODE (x);
21605 /* We're only allowed to return a subclass of CLASS. Many of the
21606 following checks fail for NO_REGS, so eliminate that early. */
21607 if (regclass == NO_REGS)
21610 /* All classes can load zeros. */
21611 if (x == CONST0_RTX (mode))
21614 /* Force constants into memory if we are loading a (nonzero) constant into
21615 an MMX or SSE register. This is because there are no MMX/SSE instructions
21616 to load from a constant. */
21618 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21621 /* Prefer SSE regs only, if we can use them for math. */
21622 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21623 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21625 /* Floating-point constants need more complex checks. */
21626 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21628 /* General regs can load everything. */
21629 if (reg_class_subset_p (regclass, GENERAL_REGS))
21632 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21633 zero above. We only want to wind up preferring 80387 registers if
21634 we plan on doing computation with them. */
21636 && standard_80387_constant_p (x))
21638 /* Limit class to non-sse. */
21639 if (regclass == FLOAT_SSE_REGS)
21641 if (regclass == FP_TOP_SSE_REGS)
21643 if (regclass == FP_SECOND_SSE_REGS)
21644 return FP_SECOND_REG;
21645 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21652 /* Generally when we see PLUS here, it's the function invariant
21653 (plus soft-fp const_int). Which can only be computed into general
21655 if (GET_CODE (x) == PLUS)
21656 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21658 /* QImode constants are easy to load, but non-constant QImode data
21659 must go into Q_REGS. */
21660 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21662 if (reg_class_subset_p (regclass, Q_REGS))
21664 if (reg_class_subset_p (Q_REGS, regclass))
21672 /* Discourage putting floating-point values in SSE registers unless
21673 SSE math is being used, and likewise for the 387 registers. */
21675 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
21677 enum machine_mode mode = GET_MODE (x);
21679 /* Restrict the output reload class to the register bank that we are doing
21680 math on. If we would like not to return a subset of CLASS, reject this
21681 alternative: if reload cannot do this, it will still use its choice. */
21682 mode = GET_MODE (x);
21683 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21684 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
21686 if (X87_FLOAT_MODE_P (mode))
21688 if (regclass == FP_TOP_SSE_REGS)
21690 else if (regclass == FP_SECOND_SSE_REGS)
21691 return FP_SECOND_REG;
21693 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21699 /* If we are copying between general and FP registers, we need a memory
21700 location. The same is true for SSE and MMX registers.
21702 To optimize register_move_cost performance, allow inline variant.
21704 The macro can't work reliably when one of the CLASSES is class containing
21705 registers from multiple units (SSE, MMX, integer). We avoid this by never
21706 combining those units in single alternative in the machine description.
21707 Ensure that this constraint holds to avoid unexpected surprises.
21709 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21710 enforce these sanity checks. */
21713 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21714 enum machine_mode mode, int strict)
21716 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21717 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21718 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21719 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21720 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21721 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
21723 gcc_assert (!strict);
21727 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21730 /* ??? This is a lie. We do have moves between mmx/general, and for
21731 mmx/sse2. But by saying we need secondary memory we discourage the
21732 register allocator from using the mmx registers unless needed. */
21733 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21736 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21738 /* SSE1 doesn't have any direct moves from other classes. */
21742 /* If the target says that inter-unit moves are more expensive
21743 than moving through memory, then don't generate them. */
21744 if (!TARGET_INTER_UNIT_MOVES)
21747 /* Between SSE and general, we have moves no larger than word size. */
21748 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21756 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21757 enum machine_mode mode, int strict)
21759 return inline_secondary_memory_needed (class1, class2, mode, strict);
21762 /* Return true if the registers in CLASS cannot represent the change from
21763 modes FROM to TO. */
21766 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
21767 enum reg_class regclass)
21772 /* x87 registers can't do subreg at all, as all values are reformatted
21773 to extended precision. */
21774 if (MAYBE_FLOAT_CLASS_P (regclass))
21777 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21779 /* Vector registers do not support QI or HImode loads. If we don't
21780 disallow a change to these modes, reload will assume it's ok to
21781 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21782 the vec_dupv4hi pattern. */
21783 if (GET_MODE_SIZE (from) < 4)
21786 /* Vector registers do not support subreg with nonzero offsets, which
21787 are otherwise valid for integer registers. Since we can't see
21788 whether we have a nonzero offset from here, prohibit all
21789 nonparadoxical subregs changing size. */
21790 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
21797 /* Return the cost of moving data of mode M between a
21798 register and memory. A value of 2 is the default; this cost is
21799 relative to those in `REGISTER_MOVE_COST'.
21801 This function is used extensively by register_move_cost that is used to
21802 build tables at startup. Make it inline in this case.
21803 When IN is 2, return maximum of in and out move cost.
21805 If moving between registers and memory is more expensive than
21806 between two registers, you should define this macro to express the
21809 Model also increased moving costs of QImode registers in non
21813 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
21817 if (FLOAT_CLASS_P (regclass))
21835 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
21836 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
21838 if (SSE_CLASS_P (regclass))
21841 switch (GET_MODE_SIZE (mode))
21856 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
21857 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
21859 if (MMX_CLASS_P (regclass))
21862 switch (GET_MODE_SIZE (mode))
21874 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
21875 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
21877 switch (GET_MODE_SIZE (mode))
21880 if (Q_CLASS_P (regclass) || TARGET_64BIT)
21883 return ix86_cost->int_store[0];
21884 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
21885 cost = ix86_cost->movzbl_load;
21887 cost = ix86_cost->int_load[0];
21889 return MAX (cost, ix86_cost->int_store[0]);
21895 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
21897 return ix86_cost->movzbl_load;
21899 return ix86_cost->int_store[0] + 4;
21904 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
21905 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
21907 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21908 if (mode == TFmode)
21911 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
21913 cost = ix86_cost->int_load[2];
21915 cost = ix86_cost->int_store[2];
21916 return (cost * (((int) GET_MODE_SIZE (mode)
21917 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
21922 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
21924 return inline_memory_move_cost (mode, regclass, in);
21928 /* Return the cost of moving data from a register in class CLASS1 to
21929 one in class CLASS2.
21931 It is not required that the cost always equal 2 when FROM is the same as TO;
21932 on some machines it is expensive to move between registers if they are not
21933 general registers. */
21936 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
21937 enum reg_class class2)
21939 /* In case we require secondary memory, compute cost of the store followed
21940 by load. In order to avoid bad register allocation choices, we need
21941 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21943 if (inline_secondary_memory_needed (class1, class2, mode, 0))
21947 cost += inline_memory_move_cost (mode, class1, 2);
21948 cost += inline_memory_move_cost (mode, class2, 2);
21950 /* In case of copying from general_purpose_register we may emit multiple
21951 stores followed by single load causing memory size mismatch stall.
21952 Count this as arbitrarily high cost of 20. */
21953 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
21956 /* In the case of FP/MMX moves, the registers actually overlap, and we
21957 have to switch modes in order to treat them differently. */
21958 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21959 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21965 /* Moves between SSE/MMX and integer unit are expensive. */
21966 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
21967 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21969 /* ??? By keeping returned value relatively high, we limit the number
21970 of moves between integer and MMX/SSE registers for all targets.
21971 Additionally, high value prevents problem with x86_modes_tieable_p(),
21972 where integer modes in MMX/SSE registers are not tieable
21973 because of missing QImode and HImode moves to, from or between
21974 MMX/SSE registers. */
21975 return MAX (ix86_cost->mmxsse_to_integer, 8);
21977 if (MAYBE_FLOAT_CLASS_P (class1))
21978 return ix86_cost->fp_move;
21979 if (MAYBE_SSE_CLASS_P (class1))
21980 return ix86_cost->sse_move;
21981 if (MAYBE_MMX_CLASS_P (class1))
21982 return ix86_cost->mmx_move;
21986 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
21989 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
21991 /* Flags and only flags can only hold CCmode values. */
21992 if (CC_REGNO_P (regno))
21993 return GET_MODE_CLASS (mode) == MODE_CC;
21994 if (GET_MODE_CLASS (mode) == MODE_CC
21995 || GET_MODE_CLASS (mode) == MODE_RANDOM
21996 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21998 if (FP_REGNO_P (regno))
21999 return VALID_FP_MODE_P (mode);
22000 if (SSE_REGNO_P (regno))
22002 /* We implement the move patterns for all vector modes into and
22003 out of SSE registers, even when no operation instructions
22005 return (VALID_SSE_REG_MODE (mode)
22006 || VALID_SSE2_REG_MODE (mode)
22007 || VALID_MMX_REG_MODE (mode)
22008 || VALID_MMX_REG_MODE_3DNOW (mode));
22010 if (MMX_REGNO_P (regno))
22012 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22013 so if the register is available at all, then we can move data of
22014 the given mode into or out of it. */
22015 return (VALID_MMX_REG_MODE (mode)
22016 || VALID_MMX_REG_MODE_3DNOW (mode));
22019 if (mode == QImode)
22021 /* Take care for QImode values - they can be in non-QI regs,
22022 but then they do cause partial register stalls. */
22023 if (regno < 4 || TARGET_64BIT)
22025 if (!TARGET_PARTIAL_REG_STALL)
22027 return reload_in_progress || reload_completed;
22029 /* We handle both integer and floats in the general purpose registers. */
22030 else if (VALID_INT_MODE_P (mode))
22032 else if (VALID_FP_MODE_P (mode))
22034 else if (VALID_DFP_MODE_P (mode))
22036 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22037 on to use that value in smaller contexts, this can easily force a
22038 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22039 supporting DImode, allow it. */
22040 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22046 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22047 tieable integer mode. */
22050 ix86_tieable_integer_mode_p (enum machine_mode mode)
22059 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22062 return TARGET_64BIT;
22069 /* Return true if MODE1 is accessible in a register that can hold MODE2
22070 without copying. That is, all register classes that can hold MODE2
22071 can also hold MODE1. */
22074 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22076 if (mode1 == mode2)
22079 if (ix86_tieable_integer_mode_p (mode1)
22080 && ix86_tieable_integer_mode_p (mode2))
22083 /* MODE2 being XFmode implies fp stack or general regs, which means we
22084 can tie any smaller floating point modes to it. Note that we do not
22085 tie this with TFmode. */
22086 if (mode2 == XFmode)
22087 return mode1 == SFmode || mode1 == DFmode;
22089 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22090 that we can tie it with SFmode. */
22091 if (mode2 == DFmode)
22092 return mode1 == SFmode;
22094 /* If MODE2 is only appropriate for an SSE register, then tie with
22095 any other mode acceptable to SSE registers. */
22096 if (GET_MODE_SIZE (mode2) == 16
22097 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22098 return (GET_MODE_SIZE (mode1) == 16
22099 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22101 /* If MODE2 is appropriate for an MMX register, then tie
22102 with any other mode acceptable to MMX registers. */
22103 if (GET_MODE_SIZE (mode2) == 8
22104 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22105 return (GET_MODE_SIZE (mode1) == 8
22106 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22111 /* Compute a (partial) cost for rtx X. Return true if the complete
22112 cost has been computed, and false if subexpressions should be
22113 scanned. In either case, *TOTAL contains the cost result. */
22116 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22118 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22119 enum machine_mode mode = GET_MODE (x);
22127 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22129 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22131 else if (flag_pic && SYMBOLIC_CONST (x)
22133 || (!GET_CODE (x) != LABEL_REF
22134 && (GET_CODE (x) != SYMBOL_REF
22135 || !SYMBOL_REF_LOCAL_P (x)))))
22142 if (mode == VOIDmode)
22145 switch (standard_80387_constant_p (x))
22150 default: /* Other constants */
22155 /* Start with (MEM (SYMBOL_REF)), since that's where
22156 it'll probably end up. Add a penalty for size. */
22157 *total = (COSTS_N_INSNS (1)
22158 + (flag_pic != 0 && !TARGET_64BIT)
22159 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22165 /* The zero extensions is often completely free on x86_64, so make
22166 it as cheap as possible. */
22167 if (TARGET_64BIT && mode == DImode
22168 && GET_MODE (XEXP (x, 0)) == SImode)
22170 else if (TARGET_ZERO_EXTEND_WITH_AND)
22171 *total = ix86_cost->add;
22173 *total = ix86_cost->movzx;
22177 *total = ix86_cost->movsx;
22181 if (CONST_INT_P (XEXP (x, 1))
22182 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22184 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22187 *total = ix86_cost->add;
22190 if ((value == 2 || value == 3)
22191 && ix86_cost->lea <= ix86_cost->shift_const)
22193 *total = ix86_cost->lea;
22203 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22205 if (CONST_INT_P (XEXP (x, 1)))
22207 if (INTVAL (XEXP (x, 1)) > 32)
22208 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22210 *total = ix86_cost->shift_const * 2;
22214 if (GET_CODE (XEXP (x, 1)) == AND)
22215 *total = ix86_cost->shift_var * 2;
22217 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22222 if (CONST_INT_P (XEXP (x, 1)))
22223 *total = ix86_cost->shift_const;
22225 *total = ix86_cost->shift_var;
22230 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22232 /* ??? SSE scalar cost should be used here. */
22233 *total = ix86_cost->fmul;
22236 else if (X87_FLOAT_MODE_P (mode))
22238 *total = ix86_cost->fmul;
22241 else if (FLOAT_MODE_P (mode))
22243 /* ??? SSE vector cost should be used here. */
22244 *total = ix86_cost->fmul;
22249 rtx op0 = XEXP (x, 0);
22250 rtx op1 = XEXP (x, 1);
22252 if (CONST_INT_P (XEXP (x, 1)))
22254 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22255 for (nbits = 0; value != 0; value &= value - 1)
22259 /* This is arbitrary. */
22262 /* Compute costs correctly for widening multiplication. */
22263 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22264 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22265 == GET_MODE_SIZE (mode))
22267 int is_mulwiden = 0;
22268 enum machine_mode inner_mode = GET_MODE (op0);
22270 if (GET_CODE (op0) == GET_CODE (op1))
22271 is_mulwiden = 1, op1 = XEXP (op1, 0);
22272 else if (CONST_INT_P (op1))
22274 if (GET_CODE (op0) == SIGN_EXTEND)
22275 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22278 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22282 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22285 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22286 + nbits * ix86_cost->mult_bit
22287 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22296 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22297 /* ??? SSE cost should be used here. */
22298 *total = ix86_cost->fdiv;
22299 else if (X87_FLOAT_MODE_P (mode))
22300 *total = ix86_cost->fdiv;
22301 else if (FLOAT_MODE_P (mode))
22302 /* ??? SSE vector cost should be used here. */
22303 *total = ix86_cost->fdiv;
22305 *total = ix86_cost->divide[MODE_INDEX (mode)];
22309 if (GET_MODE_CLASS (mode) == MODE_INT
22310 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22312 if (GET_CODE (XEXP (x, 0)) == PLUS
22313 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22314 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22315 && CONSTANT_P (XEXP (x, 1)))
22317 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22318 if (val == 2 || val == 4 || val == 8)
22320 *total = ix86_cost->lea;
22321 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22322 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22324 *total += rtx_cost (XEXP (x, 1), outer_code);
22328 else if (GET_CODE (XEXP (x, 0)) == MULT
22329 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22331 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22332 if (val == 2 || val == 4 || val == 8)
22334 *total = ix86_cost->lea;
22335 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22336 *total += rtx_cost (XEXP (x, 1), outer_code);
22340 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22342 *total = ix86_cost->lea;
22343 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22344 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22345 *total += rtx_cost (XEXP (x, 1), outer_code);
22352 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22354 /* ??? SSE cost should be used here. */
22355 *total = ix86_cost->fadd;
22358 else if (X87_FLOAT_MODE_P (mode))
22360 *total = ix86_cost->fadd;
22363 else if (FLOAT_MODE_P (mode))
22365 /* ??? SSE vector cost should be used here. */
22366 *total = ix86_cost->fadd;
22374 if (!TARGET_64BIT && mode == DImode)
22376 *total = (ix86_cost->add * 2
22377 + (rtx_cost (XEXP (x, 0), outer_code)
22378 << (GET_MODE (XEXP (x, 0)) != DImode))
22379 + (rtx_cost (XEXP (x, 1), outer_code)
22380 << (GET_MODE (XEXP (x, 1)) != DImode)));
22386 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22388 /* ??? SSE cost should be used here. */
22389 *total = ix86_cost->fchs;
22392 else if (X87_FLOAT_MODE_P (mode))
22394 *total = ix86_cost->fchs;
22397 else if (FLOAT_MODE_P (mode))
22399 /* ??? SSE vector cost should be used here. */
22400 *total = ix86_cost->fchs;
22406 if (!TARGET_64BIT && mode == DImode)
22407 *total = ix86_cost->add * 2;
22409 *total = ix86_cost->add;
22413 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22414 && XEXP (XEXP (x, 0), 1) == const1_rtx
22415 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22416 && XEXP (x, 1) == const0_rtx)
22418 /* This kind of construct is implemented using test[bwl].
22419 Treat it as if we had an AND. */
22420 *total = (ix86_cost->add
22421 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22422 + rtx_cost (const1_rtx, outer_code));
22428 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22433 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22434 /* ??? SSE cost should be used here. */
22435 *total = ix86_cost->fabs;
22436 else if (X87_FLOAT_MODE_P (mode))
22437 *total = ix86_cost->fabs;
22438 else if (FLOAT_MODE_P (mode))
22439 /* ??? SSE vector cost should be used here. */
22440 *total = ix86_cost->fabs;
22444 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22445 /* ??? SSE cost should be used here. */
22446 *total = ix86_cost->fsqrt;
22447 else if (X87_FLOAT_MODE_P (mode))
22448 *total = ix86_cost->fsqrt;
22449 else if (FLOAT_MODE_P (mode))
22450 /* ??? SSE vector cost should be used here. */
22451 *total = ix86_cost->fsqrt;
22455 if (XINT (x, 1) == UNSPEC_TP)
22466 static int current_machopic_label_num;
22468 /* Given a symbol name and its associated stub, write out the
22469 definition of the stub. */
22472 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22474 unsigned int length;
22475 char *binder_name, *symbol_name, lazy_ptr_name[32];
22476 int label = ++current_machopic_label_num;
22478 /* For 64-bit we shouldn't get here. */
22479 gcc_assert (!TARGET_64BIT);
22481 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22482 symb = (*targetm.strip_name_encoding) (symb);
22484 length = strlen (stub);
22485 binder_name = alloca (length + 32);
22486 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22488 length = strlen (symb);
22489 symbol_name = alloca (length + 32);
22490 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22492 sprintf (lazy_ptr_name, "L%d$lz", label);
22495 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22497 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22499 fprintf (file, "%s:\n", stub);
22500 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22504 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22505 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22506 fprintf (file, "\tjmp\t*%%edx\n");
22509 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22511 fprintf (file, "%s:\n", binder_name);
22515 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22516 fprintf (file, "\tpushl\t%%eax\n");
22519 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22521 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22523 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22524 fprintf (file, "%s:\n", lazy_ptr_name);
22525 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22526 fprintf (file, "\t.long %s\n", binder_name);
22530 darwin_x86_file_end (void)
22532 darwin_file_end ();
22535 #endif /* TARGET_MACHO */
22537 /* Order the registers for register allocator. */
22540 x86_order_regs_for_local_alloc (void)
22545 /* First allocate the local general purpose registers. */
22546 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22547 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22548 reg_alloc_order [pos++] = i;
22550 /* Global general purpose registers. */
22551 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22552 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22553 reg_alloc_order [pos++] = i;
22555 /* x87 registers come first in case we are doing FP math
22557 if (!TARGET_SSE_MATH)
22558 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22559 reg_alloc_order [pos++] = i;
22561 /* SSE registers. */
22562 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22563 reg_alloc_order [pos++] = i;
22564 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22565 reg_alloc_order [pos++] = i;
22567 /* x87 registers. */
22568 if (TARGET_SSE_MATH)
22569 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22570 reg_alloc_order [pos++] = i;
22572 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22573 reg_alloc_order [pos++] = i;
22575 /* Initialize the rest of array as we do not allocate some registers
22577 while (pos < FIRST_PSEUDO_REGISTER)
22578 reg_alloc_order [pos++] = 0;
22581 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22582 struct attribute_spec.handler. */
22584 ix86_handle_struct_attribute (tree *node, tree name,
22585 tree args ATTRIBUTE_UNUSED,
22586 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22589 if (DECL_P (*node))
22591 if (TREE_CODE (*node) == TYPE_DECL)
22592 type = &TREE_TYPE (*node);
22597 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22598 || TREE_CODE (*type) == UNION_TYPE)))
22600 warning (OPT_Wattributes, "%qs attribute ignored",
22601 IDENTIFIER_POINTER (name));
22602 *no_add_attrs = true;
22605 else if ((is_attribute_p ("ms_struct", name)
22606 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22607 || ((is_attribute_p ("gcc_struct", name)
22608 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22610 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22611 IDENTIFIER_POINTER (name));
22612 *no_add_attrs = true;
22619 ix86_ms_bitfield_layout_p (const_tree record_type)
22621 return (TARGET_MS_BITFIELD_LAYOUT &&
22622 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22623 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22626 /* Returns an expression indicating where the this parameter is
22627 located on entry to the FUNCTION. */
22630 x86_this_parameter (tree function)
22632 tree type = TREE_TYPE (function);
22633 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22637 const int *parm_regs;
22639 if (TARGET_64BIT_MS_ABI)
22640 parm_regs = x86_64_ms_abi_int_parameter_registers;
22642 parm_regs = x86_64_int_parameter_registers;
22643 return gen_rtx_REG (DImode, parm_regs[aggr]);
22646 if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
22648 int regno = AX_REG;
22649 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22651 return gen_rtx_REG (SImode, regno);
22654 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22657 /* Determine whether x86_output_mi_thunk can succeed. */
22660 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
22661 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
22662 HOST_WIDE_INT vcall_offset, const_tree function)
22664 /* 64-bit can handle anything. */
22668 /* For 32-bit, everything's fine if we have one free register. */
22669 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22672 /* Need a free register for vcall_offset. */
22676 /* Need a free register for GOT references. */
22677 if (flag_pic && !(*targetm.binds_local_p) (function))
22680 /* Otherwise ok. */
22684 /* Output the assembler code for a thunk function. THUNK_DECL is the
22685 declaration for the thunk function itself, FUNCTION is the decl for
22686 the target function. DELTA is an immediate constant offset to be
22687 added to THIS. If VCALL_OFFSET is nonzero, the word at
22688 *(*this + vcall_offset) should be added to THIS. */
22691 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
22692 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
22693 HOST_WIDE_INT vcall_offset, tree function)
22696 rtx this_param = x86_this_parameter (function);
22699 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22700 pull it in now and let DELTA benefit. */
22701 if (REG_P (this_param))
22702 this_reg = this_param;
22703 else if (vcall_offset)
22705 /* Put the this parameter into %eax. */
22706 xops[0] = this_param;
22707 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
22708 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22711 this_reg = NULL_RTX;
22713 /* Adjust the this parameter by a fixed constant. */
22716 xops[0] = GEN_INT (delta);
22717 xops[1] = this_reg ? this_reg : this_param;
22720 if (!x86_64_general_operand (xops[0], DImode))
22722 tmp = gen_rtx_REG (DImode, R10_REG);
22724 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
22726 xops[1] = this_param;
22728 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22731 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22734 /* Adjust the this parameter by a value stored in the vtable. */
22738 tmp = gen_rtx_REG (DImode, R10_REG);
22741 int tmp_regno = CX_REG;
22742 if (lookup_attribute ("fastcall",
22743 TYPE_ATTRIBUTES (TREE_TYPE (function))))
22744 tmp_regno = AX_REG;
22745 tmp = gen_rtx_REG (SImode, tmp_regno);
22748 xops[0] = gen_rtx_MEM (Pmode, this_reg);
22751 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22753 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22755 /* Adjust the this parameter. */
22756 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
22757 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
22759 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
22760 xops[0] = GEN_INT (vcall_offset);
22762 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22763 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
22765 xops[1] = this_reg;
22767 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22769 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22772 /* If necessary, drop THIS back to its stack slot. */
22773 if (this_reg && this_reg != this_param)
22775 xops[0] = this_reg;
22776 xops[1] = this_param;
22777 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22780 xops[0] = XEXP (DECL_RTL (function), 0);
22783 if (!flag_pic || (*targetm.binds_local_p) (function))
22784 output_asm_insn ("jmp\t%P0", xops);
22785 /* All thunks should be in the same object as their target,
22786 and thus binds_local_p should be true. */
22787 else if (TARGET_64BIT_MS_ABI)
22788 gcc_unreachable ();
22791 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
22792 tmp = gen_rtx_CONST (Pmode, tmp);
22793 tmp = gen_rtx_MEM (QImode, tmp);
22795 output_asm_insn ("jmp\t%A0", xops);
22800 if (!flag_pic || (*targetm.binds_local_p) (function))
22801 output_asm_insn ("jmp\t%P0", xops);
22806 rtx sym_ref = XEXP (DECL_RTL (function), 0);
22807 tmp = (gen_rtx_SYMBOL_REF
22809 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
22810 tmp = gen_rtx_MEM (QImode, tmp);
22812 output_asm_insn ("jmp\t%0", xops);
22815 #endif /* TARGET_MACHO */
22817 tmp = gen_rtx_REG (SImode, CX_REG);
22818 output_set_got (tmp, NULL_RTX);
22821 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
22822 output_asm_insn ("jmp\t{*}%1", xops);
22828 x86_file_start (void)
22830 default_file_start ();
22832 darwin_file_start ();
22834 if (X86_FILE_START_VERSION_DIRECTIVE)
22835 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22836 if (X86_FILE_START_FLTUSED)
22837 fputs ("\t.global\t__fltused\n", asm_out_file);
22838 if (ix86_asm_dialect == ASM_INTEL)
22839 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
22843 x86_field_alignment (tree field, int computed)
22845 enum machine_mode mode;
22846 tree type = TREE_TYPE (field);
22848 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22850 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
22851 ? get_inner_array_type (type) : type);
22852 if (mode == DFmode || mode == DCmode
22853 || GET_MODE_CLASS (mode) == MODE_INT
22854 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22855 return MIN (32, computed);
22859 /* Output assembler code to FILE to increment profiler label # LABELNO
22860 for profiling a function entry. */
22862 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22866 #ifndef NO_PROFILE_COUNTERS
22867 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
22870 if (!TARGET_64BIT_MS_ABI && flag_pic)
22871 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
22873 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22877 #ifndef NO_PROFILE_COUNTERS
22878 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22879 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
22881 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
22885 #ifndef NO_PROFILE_COUNTERS
22886 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
22887 PROFILE_COUNT_REGISTER);
22889 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22893 /* We don't have exact information about the insn sizes, but we may assume
22894 quite safely that we are informed about all 1 byte insns and memory
22895 address sizes. This is enough to eliminate unnecessary padding in
22899 min_insn_size (rtx insn)
22903 if (!INSN_P (insn) || !active_insn_p (insn))
22906 /* Discard alignments we've emit and jump instructions. */
22907 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22908 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22911 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
22912 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
22915 /* Important case - calls are always 5 bytes.
22916 It is common to have many calls in the row. */
22918 && symbolic_reference_mentioned_p (PATTERN (insn))
22919 && !SIBLING_CALL_P (insn))
22921 if (get_attr_length (insn) <= 1)
22924 /* For normal instructions we may rely on the sizes of addresses
22925 and the presence of symbol to require 4 bytes of encoding.
22926 This is not the case for jumps where references are PC relative. */
22927 if (!JUMP_P (insn))
22929 l = get_attr_length_address (insn);
22930 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22939 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22943 ix86_avoid_jump_misspredicts (void)
22945 rtx insn, start = get_insns ();
22946 int nbytes = 0, njumps = 0;
22949 /* Look for all minimal intervals of instructions containing 4 jumps.
22950 The intervals are bounded by START and INSN. NBYTES is the total
22951 size of instructions in the interval including INSN and not including
22952 START. When the NBYTES is smaller than 16 bytes, it is possible
22953 that the end of START and INSN ends up in the same 16byte page.
22955 The smallest offset in the page INSN can start is the case where START
22956 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22957 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22959 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22962 nbytes += min_insn_size (insn);
22964 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
22965 INSN_UID (insn), min_insn_size (insn));
22967 && GET_CODE (PATTERN (insn)) != ADDR_VEC
22968 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
22976 start = NEXT_INSN (start);
22977 if ((JUMP_P (start)
22978 && GET_CODE (PATTERN (start)) != ADDR_VEC
22979 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
22981 njumps--, isjump = 1;
22984 nbytes -= min_insn_size (start);
22986 gcc_assert (njumps >= 0);
22988 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
22989 INSN_UID (start), INSN_UID (insn), nbytes);
22991 if (njumps == 3 && isjump && nbytes < 16)
22993 int padsize = 15 - nbytes + min_insn_size (insn);
22996 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
22997 INSN_UID (insn), padsize);
22998 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23003 /* AMD Athlon works faster
23004 when RET is not destination of conditional jump or directly preceded
23005 by other jump instruction. We avoid the penalty by inserting NOP just
23006 before the RET instructions in such cases. */
23008 ix86_pad_returns (void)
23013 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23015 basic_block bb = e->src;
23016 rtx ret = BB_END (bb);
23018 bool replace = false;
23020 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23021 || !maybe_hot_bb_p (bb))
23023 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23024 if (active_insn_p (prev) || LABEL_P (prev))
23026 if (prev && LABEL_P (prev))
23031 FOR_EACH_EDGE (e, ei, bb->preds)
23032 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23033 && !(e->flags & EDGE_FALLTHRU))
23038 prev = prev_active_insn (ret);
23040 && ((JUMP_P (prev) && any_condjump_p (prev))
23043 /* Empty functions get branch mispredict even when the jump destination
23044 is not visible to us. */
23045 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23050 emit_insn_before (gen_return_internal_long (), ret);
23056 /* Implement machine specific optimizations. We implement padding of returns
23057 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23061 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23062 ix86_pad_returns ();
23063 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23064 ix86_avoid_jump_misspredicts ();
23067 /* Return nonzero when QImode register that must be represented via REX prefix
23070 x86_extended_QIreg_mentioned_p (rtx insn)
23073 extract_insn_cached (insn);
23074 for (i = 0; i < recog_data.n_operands; i++)
23075 if (REG_P (recog_data.operand[i])
23076 && REGNO (recog_data.operand[i]) >= 4)
23081 /* Return nonzero when P points to register encoded via REX prefix.
23082 Called via for_each_rtx. */
23084 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23086 unsigned int regno;
23089 regno = REGNO (*p);
23090 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23093 /* Return true when INSN mentions register that must be encoded using REX
23096 x86_extended_reg_mentioned_p (rtx insn)
23098 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23101 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23102 optabs would emit if we didn't have TFmode patterns. */
23105 x86_emit_floatuns (rtx operands[2])
23107 rtx neglab, donelab, i0, i1, f0, in, out;
23108 enum machine_mode mode, inmode;
23110 inmode = GET_MODE (operands[1]);
23111 gcc_assert (inmode == SImode || inmode == DImode);
23114 in = force_reg (inmode, operands[1]);
23115 mode = GET_MODE (out);
23116 neglab = gen_label_rtx ();
23117 donelab = gen_label_rtx ();
23118 f0 = gen_reg_rtx (mode);
23120 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23122 expand_float (out, in, 0);
23124 emit_jump_insn (gen_jump (donelab));
23127 emit_label (neglab);
23129 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23131 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23133 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23135 expand_float (f0, i0, 0);
23137 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23139 emit_label (donelab);
23142 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23143 with all elements equal to VAR. Return true if successful. */
23146 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23147 rtx target, rtx val)
23149 enum machine_mode smode, wsmode, wvmode;
23164 val = force_reg (GET_MODE_INNER (mode), val);
23165 x = gen_rtx_VEC_DUPLICATE (mode, val);
23166 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23172 if (TARGET_SSE || TARGET_3DNOW_A)
23174 val = gen_lowpart (SImode, val);
23175 x = gen_rtx_TRUNCATE (HImode, val);
23176 x = gen_rtx_VEC_DUPLICATE (mode, x);
23177 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23199 /* Extend HImode to SImode using a paradoxical SUBREG. */
23200 tmp1 = gen_reg_rtx (SImode);
23201 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23202 /* Insert the SImode value as low element of V4SImode vector. */
23203 tmp2 = gen_reg_rtx (V4SImode);
23204 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23205 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23206 CONST0_RTX (V4SImode),
23208 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23209 /* Cast the V4SImode vector back to a V8HImode vector. */
23210 tmp1 = gen_reg_rtx (V8HImode);
23211 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23212 /* Duplicate the low short through the whole low SImode word. */
23213 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23214 /* Cast the V8HImode vector back to a V4SImode vector. */
23215 tmp2 = gen_reg_rtx (V4SImode);
23216 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23217 /* Replicate the low element of the V4SImode vector. */
23218 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23219 /* Cast the V2SImode back to V8HImode, and store in target. */
23220 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23231 /* Extend QImode to SImode using a paradoxical SUBREG. */
23232 tmp1 = gen_reg_rtx (SImode);
23233 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23234 /* Insert the SImode value as low element of V4SImode vector. */
23235 tmp2 = gen_reg_rtx (V4SImode);
23236 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23237 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23238 CONST0_RTX (V4SImode),
23240 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23241 /* Cast the V4SImode vector back to a V16QImode vector. */
23242 tmp1 = gen_reg_rtx (V16QImode);
23243 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23244 /* Duplicate the low byte through the whole low SImode word. */
23245 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23246 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23247 /* Cast the V16QImode vector back to a V4SImode vector. */
23248 tmp2 = gen_reg_rtx (V4SImode);
23249 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23250 /* Replicate the low element of the V4SImode vector. */
23251 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23252 /* Cast the V2SImode back to V16QImode, and store in target. */
23253 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23261 /* Replicate the value once into the next wider mode and recurse. */
23262 val = convert_modes (wsmode, smode, val, true);
23263 x = expand_simple_binop (wsmode, ASHIFT, val,
23264 GEN_INT (GET_MODE_BITSIZE (smode)),
23265 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23266 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23268 x = gen_reg_rtx (wvmode);
23269 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23270 gcc_unreachable ();
23271 emit_move_insn (target, gen_lowpart (mode, x));
23279 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23280 whose ONE_VAR element is VAR, and other elements are zero. Return true
23284 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23285 rtx target, rtx var, int one_var)
23287 enum machine_mode vsimode;
23303 var = force_reg (GET_MODE_INNER (mode), var);
23304 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23305 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23310 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23311 new_target = gen_reg_rtx (mode);
23313 new_target = target;
23314 var = force_reg (GET_MODE_INNER (mode), var);
23315 x = gen_rtx_VEC_DUPLICATE (mode, var);
23316 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23317 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23320 /* We need to shuffle the value to the correct position, so
23321 create a new pseudo to store the intermediate result. */
23323 /* With SSE2, we can use the integer shuffle insns. */
23324 if (mode != V4SFmode && TARGET_SSE2)
23326 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23328 GEN_INT (one_var == 1 ? 0 : 1),
23329 GEN_INT (one_var == 2 ? 0 : 1),
23330 GEN_INT (one_var == 3 ? 0 : 1)));
23331 if (target != new_target)
23332 emit_move_insn (target, new_target);
23336 /* Otherwise convert the intermediate result to V4SFmode and
23337 use the SSE1 shuffle instructions. */
23338 if (mode != V4SFmode)
23340 tmp = gen_reg_rtx (V4SFmode);
23341 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23346 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23348 GEN_INT (one_var == 1 ? 0 : 1),
23349 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23350 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23352 if (mode != V4SFmode)
23353 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23354 else if (tmp != target)
23355 emit_move_insn (target, tmp);
23357 else if (target != new_target)
23358 emit_move_insn (target, new_target);
23363 vsimode = V4SImode;
23369 vsimode = V2SImode;
23375 /* Zero extend the variable element to SImode and recurse. */
23376 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23378 x = gen_reg_rtx (vsimode);
23379 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23381 gcc_unreachable ();
23383 emit_move_insn (target, gen_lowpart (mode, x));
23391 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23392 consisting of the values in VALS. It is known that all elements
23393 except ONE_VAR are constants. Return true if successful. */
23396 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23397 rtx target, rtx vals, int one_var)
23399 rtx var = XVECEXP (vals, 0, one_var);
23400 enum machine_mode wmode;
23403 const_vec = copy_rtx (vals);
23404 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23405 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23413 /* For the two element vectors, it's just as easy to use
23414 the general case. */
23430 /* There's no way to set one QImode entry easily. Combine
23431 the variable value with its adjacent constant value, and
23432 promote to an HImode set. */
23433 x = XVECEXP (vals, 0, one_var ^ 1);
23436 var = convert_modes (HImode, QImode, var, true);
23437 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23438 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23439 x = GEN_INT (INTVAL (x) & 0xff);
23443 var = convert_modes (HImode, QImode, var, true);
23444 x = gen_int_mode (INTVAL (x) << 8, HImode);
23446 if (x != const0_rtx)
23447 var = expand_simple_binop (HImode, IOR, var, x, var,
23448 1, OPTAB_LIB_WIDEN);
23450 x = gen_reg_rtx (wmode);
23451 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23452 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23454 emit_move_insn (target, gen_lowpart (mode, x));
23461 emit_move_insn (target, const_vec);
23462 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23466 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23467 all values variable, and none identical. */
23470 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23471 rtx target, rtx vals)
23473 enum machine_mode half_mode = GET_MODE_INNER (mode);
23474 rtx op0 = NULL, op1 = NULL;
23475 bool use_vec_concat = false;
23481 if (!mmx_ok && !TARGET_SSE)
23487 /* For the two element vectors, we always implement VEC_CONCAT. */
23488 op0 = XVECEXP (vals, 0, 0);
23489 op1 = XVECEXP (vals, 0, 1);
23490 use_vec_concat = true;
23494 half_mode = V2SFmode;
23497 half_mode = V2SImode;
23503 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23504 Recurse to load the two halves. */
23506 op0 = gen_reg_rtx (half_mode);
23507 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23508 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23510 op1 = gen_reg_rtx (half_mode);
23511 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23512 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23514 use_vec_concat = true;
23525 gcc_unreachable ();
23528 if (use_vec_concat)
23530 if (!register_operand (op0, half_mode))
23531 op0 = force_reg (half_mode, op0);
23532 if (!register_operand (op1, half_mode))
23533 op1 = force_reg (half_mode, op1);
23535 emit_insn (gen_rtx_SET (VOIDmode, target,
23536 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23540 int i, j, n_elts, n_words, n_elt_per_word;
23541 enum machine_mode inner_mode;
23542 rtx words[4], shift;
23544 inner_mode = GET_MODE_INNER (mode);
23545 n_elts = GET_MODE_NUNITS (mode);
23546 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23547 n_elt_per_word = n_elts / n_words;
23548 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23550 for (i = 0; i < n_words; ++i)
23552 rtx word = NULL_RTX;
23554 for (j = 0; j < n_elt_per_word; ++j)
23556 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23557 elt = convert_modes (word_mode, inner_mode, elt, true);
23563 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23564 word, 1, OPTAB_LIB_WIDEN);
23565 word = expand_simple_binop (word_mode, IOR, word, elt,
23566 word, 1, OPTAB_LIB_WIDEN);
23574 emit_move_insn (target, gen_lowpart (mode, words[0]));
23575 else if (n_words == 2)
23577 rtx tmp = gen_reg_rtx (mode);
23578 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23579 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23580 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23581 emit_move_insn (target, tmp);
23583 else if (n_words == 4)
23585 rtx tmp = gen_reg_rtx (V4SImode);
23586 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23587 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23588 emit_move_insn (target, gen_lowpart (mode, tmp));
23591 gcc_unreachable ();
23595 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23596 instructions unless MMX_OK is true. */
23599 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23601 enum machine_mode mode = GET_MODE (target);
23602 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23603 int n_elts = GET_MODE_NUNITS (mode);
23604 int n_var = 0, one_var = -1;
23605 bool all_same = true, all_const_zero = true;
23609 for (i = 0; i < n_elts; ++i)
23611 x = XVECEXP (vals, 0, i);
23612 if (!CONSTANT_P (x))
23613 n_var++, one_var = i;
23614 else if (x != CONST0_RTX (inner_mode))
23615 all_const_zero = false;
23616 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23620 /* Constants are best loaded from the constant pool. */
23623 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23627 /* If all values are identical, broadcast the value. */
23629 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23630 XVECEXP (vals, 0, 0)))
23633 /* Values where only one field is non-constant are best loaded from
23634 the pool and overwritten via move later. */
23638 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23639 XVECEXP (vals, 0, one_var),
23643 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23647 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
23651 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
23653 enum machine_mode mode = GET_MODE (target);
23654 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23655 bool use_vec_merge = false;
23664 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
23665 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
23667 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
23669 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
23670 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23676 use_vec_merge = TARGET_SSE4_1;
23684 /* For the two element vectors, we implement a VEC_CONCAT with
23685 the extraction of the other element. */
23687 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
23688 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
23691 op0 = val, op1 = tmp;
23693 op0 = tmp, op1 = val;
23695 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
23696 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23701 use_vec_merge = TARGET_SSE4_1;
23708 use_vec_merge = true;
23712 /* tmp = target = A B C D */
23713 tmp = copy_to_reg (target);
23714 /* target = A A B B */
23715 emit_insn (gen_sse_unpcklps (target, target, target));
23716 /* target = X A B B */
23717 ix86_expand_vector_set (false, target, val, 0);
23718 /* target = A X C D */
23719 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23720 GEN_INT (1), GEN_INT (0),
23721 GEN_INT (2+4), GEN_INT (3+4)));
23725 /* tmp = target = A B C D */
23726 tmp = copy_to_reg (target);
23727 /* tmp = X B C D */
23728 ix86_expand_vector_set (false, tmp, val, 0);
23729 /* target = A B X D */
23730 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23731 GEN_INT (0), GEN_INT (1),
23732 GEN_INT (0+4), GEN_INT (3+4)));
23736 /* tmp = target = A B C D */
23737 tmp = copy_to_reg (target);
23738 /* tmp = X B C D */
23739 ix86_expand_vector_set (false, tmp, val, 0);
23740 /* target = A B X D */
23741 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23742 GEN_INT (0), GEN_INT (1),
23743 GEN_INT (2+4), GEN_INT (0+4)));
23747 gcc_unreachable ();
23752 use_vec_merge = TARGET_SSE4_1;
23756 /* Element 0 handled by vec_merge below. */
23759 use_vec_merge = true;
23765 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23766 store into element 0, then shuffle them back. */
23770 order[0] = GEN_INT (elt);
23771 order[1] = const1_rtx;
23772 order[2] = const2_rtx;
23773 order[3] = GEN_INT (3);
23774 order[elt] = const0_rtx;
23776 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23777 order[1], order[2], order[3]));
23779 ix86_expand_vector_set (false, target, val, 0);
23781 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23782 order[1], order[2], order[3]));
23786 /* For SSE1, we have to reuse the V4SF code. */
23787 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
23788 gen_lowpart (SFmode, val), elt);
23793 use_vec_merge = TARGET_SSE2;
23796 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23800 use_vec_merge = TARGET_SSE4_1;
23810 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
23811 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
23812 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23816 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23818 emit_move_insn (mem, target);
23820 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23821 emit_move_insn (tmp, val);
23823 emit_move_insn (target, mem);
23828 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
23830 enum machine_mode mode = GET_MODE (vec);
23831 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23832 bool use_vec_extr = false;
23845 use_vec_extr = true;
23849 use_vec_extr = TARGET_SSE4_1;
23861 tmp = gen_reg_rtx (mode);
23862 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
23863 GEN_INT (elt), GEN_INT (elt),
23864 GEN_INT (elt+4), GEN_INT (elt+4)));
23868 tmp = gen_reg_rtx (mode);
23869 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
23873 gcc_unreachable ();
23876 use_vec_extr = true;
23881 use_vec_extr = TARGET_SSE4_1;
23895 tmp = gen_reg_rtx (mode);
23896 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
23897 GEN_INT (elt), GEN_INT (elt),
23898 GEN_INT (elt), GEN_INT (elt)));
23902 tmp = gen_reg_rtx (mode);
23903 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
23907 gcc_unreachable ();
23910 use_vec_extr = true;
23915 /* For SSE1, we have to reuse the V4SF code. */
23916 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
23917 gen_lowpart (V4SFmode, vec), elt);
23923 use_vec_extr = TARGET_SSE2;
23926 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23930 use_vec_extr = TARGET_SSE4_1;
23934 /* ??? Could extract the appropriate HImode element and shift. */
23941 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
23942 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
23944 /* Let the rtl optimizers know about the zero extension performed. */
23945 if (inner_mode == QImode || inner_mode == HImode)
23947 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
23948 target = gen_lowpart (SImode, target);
23951 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23955 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23957 emit_move_insn (mem, vec);
23959 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23960 emit_move_insn (target, tmp);
23964 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
23965 pattern to reduce; DEST is the destination; IN is the input vector. */
23968 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
23970 rtx tmp1, tmp2, tmp3;
23972 tmp1 = gen_reg_rtx (V4SFmode);
23973 tmp2 = gen_reg_rtx (V4SFmode);
23974 tmp3 = gen_reg_rtx (V4SFmode);
23976 emit_insn (gen_sse_movhlps (tmp1, in, in));
23977 emit_insn (fn (tmp2, tmp1, in));
23979 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
23980 GEN_INT (1), GEN_INT (1),
23981 GEN_INT (1+4), GEN_INT (1+4)));
23982 emit_insn (fn (dest, tmp2, tmp3));
23985 /* Target hook for scalar_mode_supported_p. */
23987 ix86_scalar_mode_supported_p (enum machine_mode mode)
23989 if (DECIMAL_FLOAT_MODE_P (mode))
23991 else if (mode == TFmode)
23992 return TARGET_64BIT;
23994 return default_scalar_mode_supported_p (mode);
23997 /* Implements target hook vector_mode_supported_p. */
23999 ix86_vector_mode_supported_p (enum machine_mode mode)
24001 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24003 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24005 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24007 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24012 /* Target hook for c_mode_for_suffix. */
24013 static enum machine_mode
24014 ix86_c_mode_for_suffix (char suffix)
24016 if (TARGET_64BIT && suffix == 'q')
24018 if (TARGET_MMX && suffix == 'w')
24024 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24026 We do this in the new i386 backend to maintain source compatibility
24027 with the old cc0-based compiler. */
24030 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24031 tree inputs ATTRIBUTE_UNUSED,
24034 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24036 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24041 /* Implements target vector targetm.asm.encode_section_info. This
24042 is not used by netware. */
24044 static void ATTRIBUTE_UNUSED
24045 ix86_encode_section_info (tree decl, rtx rtl, int first)
24047 default_encode_section_info (decl, rtl, first);
24049 if (TREE_CODE (decl) == VAR_DECL
24050 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24051 && ix86_in_large_data_p (decl))
24052 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24055 /* Worker function for REVERSE_CONDITION. */
24058 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24060 return (mode != CCFPmode && mode != CCFPUmode
24061 ? reverse_condition (code)
24062 : reverse_condition_maybe_unordered (code));
24065 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24069 output_387_reg_move (rtx insn, rtx *operands)
24071 if (REG_P (operands[0]))
24073 if (REG_P (operands[1])
24074 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24076 if (REGNO (operands[0]) == FIRST_STACK_REG)
24077 return output_387_ffreep (operands, 0);
24078 return "fstp\t%y0";
24080 if (STACK_TOP_P (operands[0]))
24081 return "fld%z1\t%y1";
24084 else if (MEM_P (operands[0]))
24086 gcc_assert (REG_P (operands[1]));
24087 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24088 return "fstp%z0\t%y0";
24091 /* There is no non-popping store to memory for XFmode.
24092 So if we need one, follow the store with a load. */
24093 if (GET_MODE (operands[0]) == XFmode)
24094 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24096 return "fst%z0\t%y0";
24103 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24104 FP status register is set. */
24107 ix86_emit_fp_unordered_jump (rtx label)
24109 rtx reg = gen_reg_rtx (HImode);
24112 emit_insn (gen_x86_fnstsw_1 (reg));
24114 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24116 emit_insn (gen_x86_sahf_1 (reg));
24118 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24119 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24123 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24125 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24126 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24129 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24130 gen_rtx_LABEL_REF (VOIDmode, label),
24132 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24134 emit_jump_insn (temp);
24135 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24138 /* Output code to perform a log1p XFmode calculation. */
24140 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24142 rtx label1 = gen_label_rtx ();
24143 rtx label2 = gen_label_rtx ();
24145 rtx tmp = gen_reg_rtx (XFmode);
24146 rtx tmp2 = gen_reg_rtx (XFmode);
24148 emit_insn (gen_absxf2 (tmp, op1));
24149 emit_insn (gen_cmpxf (tmp,
24150 CONST_DOUBLE_FROM_REAL_VALUE (
24151 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24153 emit_jump_insn (gen_bge (label1));
24155 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24156 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24157 emit_jump (label2);
24159 emit_label (label1);
24160 emit_move_insn (tmp, CONST1_RTX (XFmode));
24161 emit_insn (gen_addxf3 (tmp, op1, tmp));
24162 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24163 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24165 emit_label (label2);
24168 /* Output code to perform a Newton-Rhapson approximation of a single precision
24169 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24171 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24173 rtx x0, x1, e0, e1, two;
24175 x0 = gen_reg_rtx (mode);
24176 e0 = gen_reg_rtx (mode);
24177 e1 = gen_reg_rtx (mode);
24178 x1 = gen_reg_rtx (mode);
24180 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24182 if (VECTOR_MODE_P (mode))
24183 two = ix86_build_const_vector (SFmode, true, two);
24185 two = force_reg (mode, two);
24187 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24189 /* x0 = 1./b estimate */
24190 emit_insn (gen_rtx_SET (VOIDmode, x0,
24191 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24194 emit_insn (gen_rtx_SET (VOIDmode, e0,
24195 gen_rtx_MULT (mode, x0, b)));
24197 emit_insn (gen_rtx_SET (VOIDmode, e1,
24198 gen_rtx_MINUS (mode, two, e0)));
24200 emit_insn (gen_rtx_SET (VOIDmode, x1,
24201 gen_rtx_MULT (mode, x0, e1)));
24203 emit_insn (gen_rtx_SET (VOIDmode, res,
24204 gen_rtx_MULT (mode, a, x1)));
24207 /* Output code to perform a Newton-Rhapson approximation of a
24208 single precision floating point [reciprocal] square root. */
24210 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24213 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
24215 x0 = gen_reg_rtx (mode);
24216 e0 = gen_reg_rtx (mode);
24217 e1 = gen_reg_rtx (mode);
24218 e2 = gen_reg_rtx (mode);
24219 e3 = gen_reg_rtx (mode);
24221 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
24222 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
24224 mask = gen_reg_rtx (mode);
24226 if (VECTOR_MODE_P (mode))
24228 three = ix86_build_const_vector (SFmode, true, three);
24229 half = ix86_build_const_vector (SFmode, true, half);
24232 three = force_reg (mode, three);
24233 half = force_reg (mode, half);
24235 zero = force_reg (mode, CONST0_RTX(mode));
24237 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
24238 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
24240 /* Compare a to zero. */
24241 emit_insn (gen_rtx_SET (VOIDmode, mask,
24242 gen_rtx_NE (mode, a, zero)));
24244 /* x0 = 1./sqrt(a) estimate */
24245 emit_insn (gen_rtx_SET (VOIDmode, x0,
24246 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24248 /* Filter out infinity. */
24249 if (VECTOR_MODE_P (mode))
24250 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
24252 gen_lowpart (V4SFmode, x0),
24253 gen_lowpart (V4SFmode, mask))));
24255 emit_insn (gen_rtx_SET (VOIDmode, x0,
24256 gen_rtx_AND (mode, x0, mask)));
24259 emit_insn (gen_rtx_SET (VOIDmode, e0,
24260 gen_rtx_MULT (mode, x0, a)));
24262 emit_insn (gen_rtx_SET (VOIDmode, e1,
24263 gen_rtx_MULT (mode, e0, x0)));
24265 emit_insn (gen_rtx_SET (VOIDmode, e2,
24266 gen_rtx_MINUS (mode, three, e1)));
24269 emit_insn (gen_rtx_SET (VOIDmode, e3,
24270 gen_rtx_MULT (mode, half, x0)));
24273 emit_insn (gen_rtx_SET (VOIDmode, e3,
24274 gen_rtx_MULT (mode, half, e0)));
24275 /* ret = e2 * e3 */
24276 emit_insn (gen_rtx_SET (VOIDmode, res,
24277 gen_rtx_MULT (mode, e2, e3)));
24280 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24282 static void ATTRIBUTE_UNUSED
24283 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24286 /* With Binutils 2.15, the "@unwind" marker must be specified on
24287 every occurrence of the ".eh_frame" section, not just the first
24290 && strcmp (name, ".eh_frame") == 0)
24292 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24293 flags & SECTION_WRITE ? "aw" : "a");
24296 default_elf_asm_named_section (name, flags, decl);
24299 /* Return the mangling of TYPE if it is an extended fundamental type. */
24301 static const char *
24302 ix86_mangle_type (const_tree type)
24304 type = TYPE_MAIN_VARIANT (type);
24306 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24307 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24310 switch (TYPE_MODE (type))
24313 /* __float128 is "g". */
24316 /* "long double" or __float80 is "e". */
24323 /* For 32-bit code we can save PIC register setup by using
24324 __stack_chk_fail_local hidden function instead of calling
24325 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24326 register, so it is better to call __stack_chk_fail directly. */
24329 ix86_stack_protect_fail (void)
24331 return TARGET_64BIT
24332 ? default_external_stack_protect_fail ()
24333 : default_hidden_stack_protect_fail ();
24336 /* Select a format to encode pointers in exception handling data. CODE
24337 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24338 true if the symbol may be affected by dynamic relocations.
24340 ??? All x86 object file formats are capable of representing this.
24341 After all, the relocation needed is the same as for the call insn.
24342 Whether or not a particular assembler allows us to enter such, I
24343 guess we'll have to see. */
24345 asm_preferred_eh_data_format (int code, int global)
24349 int type = DW_EH_PE_sdata8;
24351 || ix86_cmodel == CM_SMALL_PIC
24352 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24353 type = DW_EH_PE_sdata4;
24354 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24356 if (ix86_cmodel == CM_SMALL
24357 || (ix86_cmodel == CM_MEDIUM && code))
24358 return DW_EH_PE_udata4;
24359 return DW_EH_PE_absptr;
24362 /* Expand copysign from SIGN to the positive value ABS_VALUE
24363 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24366 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24368 enum machine_mode mode = GET_MODE (sign);
24369 rtx sgn = gen_reg_rtx (mode);
24370 if (mask == NULL_RTX)
24372 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24373 if (!VECTOR_MODE_P (mode))
24375 /* We need to generate a scalar mode mask in this case. */
24376 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24377 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24378 mask = gen_reg_rtx (mode);
24379 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24383 mask = gen_rtx_NOT (mode, mask);
24384 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24385 gen_rtx_AND (mode, mask, sign)));
24386 emit_insn (gen_rtx_SET (VOIDmode, result,
24387 gen_rtx_IOR (mode, abs_value, sgn)));
24390 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24391 mask for masking out the sign-bit is stored in *SMASK, if that is
24394 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24396 enum machine_mode mode = GET_MODE (op0);
24399 xa = gen_reg_rtx (mode);
24400 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24401 if (!VECTOR_MODE_P (mode))
24403 /* We need to generate a scalar mode mask in this case. */
24404 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24405 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24406 mask = gen_reg_rtx (mode);
24407 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24409 emit_insn (gen_rtx_SET (VOIDmode, xa,
24410 gen_rtx_AND (mode, op0, mask)));
24418 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24419 swapping the operands if SWAP_OPERANDS is true. The expanded
24420 code is a forward jump to a newly created label in case the
24421 comparison is true. The generated label rtx is returned. */
24423 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24424 bool swap_operands)
24435 label = gen_label_rtx ();
24436 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24437 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24438 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24439 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24440 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24441 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24442 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24443 JUMP_LABEL (tmp) = label;
24448 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24449 using comparison code CODE. Operands are swapped for the comparison if
24450 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24452 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24453 bool swap_operands)
24455 enum machine_mode mode = GET_MODE (op0);
24456 rtx mask = gen_reg_rtx (mode);
24465 if (mode == DFmode)
24466 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24467 gen_rtx_fmt_ee (code, mode, op0, op1)));
24469 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24470 gen_rtx_fmt_ee (code, mode, op0, op1)));
24475 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24476 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24478 ix86_gen_TWO52 (enum machine_mode mode)
24480 REAL_VALUE_TYPE TWO52r;
24483 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24484 TWO52 = const_double_from_real_value (TWO52r, mode);
24485 TWO52 = force_reg (mode, TWO52);
24490 /* Expand SSE sequence for computing lround from OP1 storing
24493 ix86_expand_lround (rtx op0, rtx op1)
24495 /* C code for the stuff we're doing below:
24496 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24499 enum machine_mode mode = GET_MODE (op1);
24500 const struct real_format *fmt;
24501 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24504 /* load nextafter (0.5, 0.0) */
24505 fmt = REAL_MODE_FORMAT (mode);
24506 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24507 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24509 /* adj = copysign (0.5, op1) */
24510 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24511 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24513 /* adj = op1 + adj */
24514 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24516 /* op0 = (imode)adj */
24517 expand_fix (op0, adj, 0);
24520 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24523 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24525 /* C code for the stuff we're doing below (for do_floor):
24527 xi -= (double)xi > op1 ? 1 : 0;
24530 enum machine_mode fmode = GET_MODE (op1);
24531 enum machine_mode imode = GET_MODE (op0);
24532 rtx ireg, freg, label, tmp;
24534 /* reg = (long)op1 */
24535 ireg = gen_reg_rtx (imode);
24536 expand_fix (ireg, op1, 0);
24538 /* freg = (double)reg */
24539 freg = gen_reg_rtx (fmode);
24540 expand_float (freg, ireg, 0);
24542 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24543 label = ix86_expand_sse_compare_and_jump (UNLE,
24544 freg, op1, !do_floor);
24545 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24546 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24547 emit_move_insn (ireg, tmp);
24549 emit_label (label);
24550 LABEL_NUSES (label) = 1;
24552 emit_move_insn (op0, ireg);
24555 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24556 result in OPERAND0. */
24558 ix86_expand_rint (rtx operand0, rtx operand1)
24560 /* C code for the stuff we're doing below:
24561 xa = fabs (operand1);
24562 if (!isless (xa, 2**52))
24564 xa = xa + 2**52 - 2**52;
24565 return copysign (xa, operand1);
24567 enum machine_mode mode = GET_MODE (operand0);
24568 rtx res, xa, label, TWO52, mask;
24570 res = gen_reg_rtx (mode);
24571 emit_move_insn (res, operand1);
24573 /* xa = abs (operand1) */
24574 xa = ix86_expand_sse_fabs (res, &mask);
24576 /* if (!isless (xa, TWO52)) goto label; */
24577 TWO52 = ix86_gen_TWO52 (mode);
24578 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24580 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24581 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24583 ix86_sse_copysign_to_positive (res, xa, res, mask);
24585 emit_label (label);
24586 LABEL_NUSES (label) = 1;
24588 emit_move_insn (operand0, res);
24591 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24594 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24596 /* C code for the stuff we expand below.
24597 double xa = fabs (x), x2;
24598 if (!isless (xa, TWO52))
24600 xa = xa + TWO52 - TWO52;
24601 x2 = copysign (xa, x);
24610 enum machine_mode mode = GET_MODE (operand0);
24611 rtx xa, TWO52, tmp, label, one, res, mask;
24613 TWO52 = ix86_gen_TWO52 (mode);
24615 /* Temporary for holding the result, initialized to the input
24616 operand to ease control flow. */
24617 res = gen_reg_rtx (mode);
24618 emit_move_insn (res, operand1);
24620 /* xa = abs (operand1) */
24621 xa = ix86_expand_sse_fabs (res, &mask);
24623 /* if (!isless (xa, TWO52)) goto label; */
24624 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24626 /* xa = xa + TWO52 - TWO52; */
24627 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24628 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24630 /* xa = copysign (xa, operand1) */
24631 ix86_sse_copysign_to_positive (xa, xa, res, mask);
24633 /* generate 1.0 or -1.0 */
24634 one = force_reg (mode,
24635 const_double_from_real_value (do_floor
24636 ? dconst1 : dconstm1, mode));
24638 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24639 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24640 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24641 gen_rtx_AND (mode, one, tmp)));
24642 /* We always need to subtract here to preserve signed zero. */
24643 tmp = expand_simple_binop (mode, MINUS,
24644 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24645 emit_move_insn (res, tmp);
24647 emit_label (label);
24648 LABEL_NUSES (label) = 1;
24650 emit_move_insn (operand0, res);
24653 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24656 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
24658 /* C code for the stuff we expand below.
24659 double xa = fabs (x), x2;
24660 if (!isless (xa, TWO52))
24662 x2 = (double)(long)x;
24669 if (HONOR_SIGNED_ZEROS (mode))
24670 return copysign (x2, x);
24673 enum machine_mode mode = GET_MODE (operand0);
24674 rtx xa, xi, TWO52, tmp, label, one, res, mask;
24676 TWO52 = ix86_gen_TWO52 (mode);
24678 /* Temporary for holding the result, initialized to the input
24679 operand to ease control flow. */
24680 res = gen_reg_rtx (mode);
24681 emit_move_insn (res, operand1);
24683 /* xa = abs (operand1) */
24684 xa = ix86_expand_sse_fabs (res, &mask);
24686 /* if (!isless (xa, TWO52)) goto label; */
24687 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24689 /* xa = (double)(long)x */
24690 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24691 expand_fix (xi, res, 0);
24692 expand_float (xa, xi, 0);
24695 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24697 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24698 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24699 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24700 gen_rtx_AND (mode, one, tmp)));
24701 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
24702 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24703 emit_move_insn (res, tmp);
24705 if (HONOR_SIGNED_ZEROS (mode))
24706 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24708 emit_label (label);
24709 LABEL_NUSES (label) = 1;
24711 emit_move_insn (operand0, res);
24714 /* Expand SSE sequence for computing round from OPERAND1 storing
24715 into OPERAND0. Sequence that works without relying on DImode truncation
24716 via cvttsd2siq that is only available on 64bit targets. */
24718 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
24720 /* C code for the stuff we expand below.
24721 double xa = fabs (x), xa2, x2;
24722 if (!isless (xa, TWO52))
24724 Using the absolute value and copying back sign makes
24725 -0.0 -> -0.0 correct.
24726 xa2 = xa + TWO52 - TWO52;
24731 else if (dxa > 0.5)
24733 x2 = copysign (xa2, x);
24736 enum machine_mode mode = GET_MODE (operand0);
24737 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
24739 TWO52 = ix86_gen_TWO52 (mode);
24741 /* Temporary for holding the result, initialized to the input
24742 operand to ease control flow. */
24743 res = gen_reg_rtx (mode);
24744 emit_move_insn (res, operand1);
24746 /* xa = abs (operand1) */
24747 xa = ix86_expand_sse_fabs (res, &mask);
24749 /* if (!isless (xa, TWO52)) goto label; */
24750 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24752 /* xa2 = xa + TWO52 - TWO52; */
24753 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24754 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
24756 /* dxa = xa2 - xa; */
24757 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
24759 /* generate 0.5, 1.0 and -0.5 */
24760 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
24761 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
24762 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
24766 tmp = gen_reg_rtx (mode);
24767 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24768 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
24769 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24770 gen_rtx_AND (mode, one, tmp)));
24771 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24772 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24773 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
24774 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24775 gen_rtx_AND (mode, one, tmp)));
24776 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24778 /* res = copysign (xa2, operand1) */
24779 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
24781 emit_label (label);
24782 LABEL_NUSES (label) = 1;
24784 emit_move_insn (operand0, res);
24787 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24790 ix86_expand_trunc (rtx operand0, rtx operand1)
24792 /* C code for SSE variant we expand below.
24793 double xa = fabs (x), x2;
24794 if (!isless (xa, TWO52))
24796 x2 = (double)(long)x;
24797 if (HONOR_SIGNED_ZEROS (mode))
24798 return copysign (x2, x);
24801 enum machine_mode mode = GET_MODE (operand0);
24802 rtx xa, xi, TWO52, label, res, mask;
24804 TWO52 = ix86_gen_TWO52 (mode);
24806 /* Temporary for holding the result, initialized to the input
24807 operand to ease control flow. */
24808 res = gen_reg_rtx (mode);
24809 emit_move_insn (res, operand1);
24811 /* xa = abs (operand1) */
24812 xa = ix86_expand_sse_fabs (res, &mask);
24814 /* if (!isless (xa, TWO52)) goto label; */
24815 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24817 /* x = (double)(long)x */
24818 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24819 expand_fix (xi, res, 0);
24820 expand_float (res, xi, 0);
24822 if (HONOR_SIGNED_ZEROS (mode))
24823 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24825 emit_label (label);
24826 LABEL_NUSES (label) = 1;
24828 emit_move_insn (operand0, res);
24831 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24834 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
24836 enum machine_mode mode = GET_MODE (operand0);
24837 rtx xa, mask, TWO52, label, one, res, smask, tmp;
24839 /* C code for SSE variant we expand below.
24840 double xa = fabs (x), x2;
24841 if (!isless (xa, TWO52))
24843 xa2 = xa + TWO52 - TWO52;
24847 x2 = copysign (xa2, x);
24851 TWO52 = ix86_gen_TWO52 (mode);
24853 /* Temporary for holding the result, initialized to the input
24854 operand to ease control flow. */
24855 res = gen_reg_rtx (mode);
24856 emit_move_insn (res, operand1);
24858 /* xa = abs (operand1) */
24859 xa = ix86_expand_sse_fabs (res, &smask);
24861 /* if (!isless (xa, TWO52)) goto label; */
24862 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24864 /* res = xa + TWO52 - TWO52; */
24865 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24866 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
24867 emit_move_insn (res, tmp);
24870 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24872 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24873 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
24874 emit_insn (gen_rtx_SET (VOIDmode, mask,
24875 gen_rtx_AND (mode, mask, one)));
24876 tmp = expand_simple_binop (mode, MINUS,
24877 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
24878 emit_move_insn (res, tmp);
24880 /* res = copysign (res, operand1) */
24881 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
24883 emit_label (label);
24884 LABEL_NUSES (label) = 1;
24886 emit_move_insn (operand0, res);
24889 /* Expand SSE sequence for computing round from OPERAND1 storing
24892 ix86_expand_round (rtx operand0, rtx operand1)
24894 /* C code for the stuff we're doing below:
24895 double xa = fabs (x);
24896 if (!isless (xa, TWO52))
24898 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24899 return copysign (xa, x);
24901 enum machine_mode mode = GET_MODE (operand0);
24902 rtx res, TWO52, xa, label, xi, half, mask;
24903 const struct real_format *fmt;
24904 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24906 /* Temporary for holding the result, initialized to the input
24907 operand to ease control flow. */
24908 res = gen_reg_rtx (mode);
24909 emit_move_insn (res, operand1);
24911 TWO52 = ix86_gen_TWO52 (mode);
24912 xa = ix86_expand_sse_fabs (res, &mask);
24913 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24915 /* load nextafter (0.5, 0.0) */
24916 fmt = REAL_MODE_FORMAT (mode);
24917 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24918 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24920 /* xa = xa + 0.5 */
24921 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
24922 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
24924 /* xa = (double)(int64_t)xa */
24925 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24926 expand_fix (xi, xa, 0);
24927 expand_float (xa, xi, 0);
24929 /* res = copysign (xa, operand1) */
24930 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
24932 emit_label (label);
24933 LABEL_NUSES (label) = 1;
24935 emit_move_insn (operand0, res);
24939 /* Validate whether a SSE5 instruction is valid or not.
24940 OPERANDS is the array of operands.
24941 NUM is the number of operands.
24942 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
24943 NUM_MEMORY is the maximum number of memory operands to accept. */
24945 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
24951 /* Count the number of memory arguments */
24954 for (i = 0; i < num; i++)
24956 enum machine_mode mode = GET_MODE (operands[i]);
24957 if (register_operand (operands[i], mode))
24960 else if (memory_operand (operands[i], mode))
24962 mem_mask |= (1 << i);
24968 rtx pattern = PATTERN (insn);
24970 /* allow 0 for pcmov */
24971 if (GET_CODE (pattern) != SET
24972 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
24974 || operands[i] != CONST0_RTX (mode))
24979 /* If there were no memory operations, allow the insn */
24983 /* Do not allow the destination register to be a memory operand. */
24984 else if (mem_mask & (1 << 0))
24987 /* If there are too many memory operations, disallow the instruction. While
24988 the hardware only allows 1 memory reference, before register allocation
24989 for some insns, we allow two memory operations sometimes in order to allow
24990 code like the following to be optimized:
24992 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24994 or similar cases that are vectorized into using the fmaddss
24996 else if (mem_count > num_memory)
24999 /* Don't allow more than one memory operation if not optimizing. */
25000 else if (mem_count > 1 && !optimize)
25003 else if (num == 4 && mem_count == 1)
25005 /* formats (destination is the first argument), example fmaddss:
25006 xmm1, xmm1, xmm2, xmm3/mem
25007 xmm1, xmm1, xmm2/mem, xmm3
25008 xmm1, xmm2, xmm3/mem, xmm1
25009 xmm1, xmm2/mem, xmm3, xmm1 */
25011 return ((mem_mask == (1 << 1))
25012 || (mem_mask == (1 << 2))
25013 || (mem_mask == (1 << 3)));
25015 /* format, example pmacsdd:
25016 xmm1, xmm2, xmm3/mem, xmm1 */
25018 return (mem_mask == (1 << 2));
25021 else if (num == 4 && num_memory == 2)
25023 /* If there are two memory operations, we can load one of the memory ops
25024 into the destination register. This is for optimizing the
25025 multiply/add ops, which the combiner has optimized both the multiply
25026 and the add insns to have a memory operation. We have to be careful
25027 that the destination doesn't overlap with the inputs. */
25028 rtx op0 = operands[0];
25030 if (reg_mentioned_p (op0, operands[1])
25031 || reg_mentioned_p (op0, operands[2])
25032 || reg_mentioned_p (op0, operands[3]))
25035 /* formats (destination is the first argument), example fmaddss:
25036 xmm1, xmm1, xmm2, xmm3/mem
25037 xmm1, xmm1, xmm2/mem, xmm3
25038 xmm1, xmm2, xmm3/mem, xmm1
25039 xmm1, xmm2/mem, xmm3, xmm1
25041 For the oc0 case, we will load either operands[1] or operands[3] into
25042 operands[0], so any combination of 2 memory operands is ok. */
25046 /* format, example pmacsdd:
25047 xmm1, xmm2, xmm3/mem, xmm1
25049 For the integer multiply/add instructions be more restrictive and
25050 require operands[2] and operands[3] to be the memory operands. */
25052 return (mem_mask == ((1 << 2) | (1 << 3)));
25055 else if (num == 3 && num_memory == 1)
25057 /* formats, example protb:
25058 xmm1, xmm2, xmm3/mem
25059 xmm1, xmm2/mem, xmm3 */
25061 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25063 /* format, example comeq:
25064 xmm1, xmm2, xmm3/mem */
25066 return (mem_mask == (1 << 2));
25070 gcc_unreachable ();
25076 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25077 hardware will allow by using the destination register to load one of the
25078 memory operations. Presently this is used by the multiply/add routines to
25079 allow 2 memory references. */
25082 ix86_expand_sse5_multiple_memory (rtx operands[],
25084 enum machine_mode mode)
25086 rtx op0 = operands[0];
25088 || memory_operand (op0, mode)
25089 || reg_mentioned_p (op0, operands[1])
25090 || reg_mentioned_p (op0, operands[2])
25091 || reg_mentioned_p (op0, operands[3]))
25092 gcc_unreachable ();
25094 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25095 the destination register. */
25096 if (memory_operand (operands[1], mode))
25098 emit_move_insn (op0, operands[1]);
25101 else if (memory_operand (operands[3], mode))
25103 emit_move_insn (op0, operands[3]);
25107 gcc_unreachable ();
25113 /* Table of valid machine attributes. */
25114 static const struct attribute_spec ix86_attribute_table[] =
25116 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25117 /* Stdcall attribute says callee is responsible for popping arguments
25118 if they are not variable. */
25119 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25120 /* Fastcall attribute says callee is responsible for popping arguments
25121 if they are not variable. */
25122 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25123 /* Cdecl attribute says the callee is a normal C declaration */
25124 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25125 /* Regparm attribute specifies how many integer arguments are to be
25126 passed in registers. */
25127 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25128 /* Sseregparm attribute says we are using x86_64 calling conventions
25129 for FP arguments. */
25130 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25131 /* force_align_arg_pointer says this function realigns the stack at entry. */
25132 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25133 false, true, true, ix86_handle_cconv_attribute },
25134 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25135 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25136 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25137 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25139 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25140 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25141 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25142 SUBTARGET_ATTRIBUTE_TABLE,
25144 { NULL, 0, 0, false, false, false, NULL }
25147 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25149 x86_builtin_vectorization_cost (bool runtime_test)
25151 /* If the branch of the runtime test is taken - i.e. - the vectorized
25152 version is skipped - this incurs a misprediction cost (because the
25153 vectorized version is expected to be the fall-through). So we subtract
25154 the latency of a mispredicted branch from the costs that are incured
25155 when the vectorized version is executed.
25157 TODO: The values in individual target tables have to be tuned or new
25158 fields may be needed. For eg. on K8, the default branch path is the
25159 not-taken path. If the taken path is predicted correctly, the minimum
25160 penalty of going down the taken-path is 1 cycle. If the taken-path is
25161 not predicted correctly, then the minimum penalty is 10 cycles. */
25165 return (-(ix86_cost->cond_taken_branch_cost));
25171 /* Initialize the GCC target structure. */
25172 #undef TARGET_ATTRIBUTE_TABLE
25173 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25174 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25175 # undef TARGET_MERGE_DECL_ATTRIBUTES
25176 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25179 #undef TARGET_COMP_TYPE_ATTRIBUTES
25180 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25182 #undef TARGET_INIT_BUILTINS
25183 #define TARGET_INIT_BUILTINS ix86_init_builtins
25184 #undef TARGET_EXPAND_BUILTIN
25185 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25187 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25188 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25189 ix86_builtin_vectorized_function
25191 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25192 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25194 #undef TARGET_BUILTIN_RECIPROCAL
25195 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25197 #undef TARGET_ASM_FUNCTION_EPILOGUE
25198 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25200 #undef TARGET_ENCODE_SECTION_INFO
25201 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25202 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25204 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25207 #undef TARGET_ASM_OPEN_PAREN
25208 #define TARGET_ASM_OPEN_PAREN ""
25209 #undef TARGET_ASM_CLOSE_PAREN
25210 #define TARGET_ASM_CLOSE_PAREN ""
25212 #undef TARGET_ASM_ALIGNED_HI_OP
25213 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25214 #undef TARGET_ASM_ALIGNED_SI_OP
25215 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25217 #undef TARGET_ASM_ALIGNED_DI_OP
25218 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25221 #undef TARGET_ASM_UNALIGNED_HI_OP
25222 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25223 #undef TARGET_ASM_UNALIGNED_SI_OP
25224 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25225 #undef TARGET_ASM_UNALIGNED_DI_OP
25226 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25228 #undef TARGET_SCHED_ADJUST_COST
25229 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25230 #undef TARGET_SCHED_ISSUE_RATE
25231 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25232 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25233 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25234 ia32_multipass_dfa_lookahead
25236 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25237 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25240 #undef TARGET_HAVE_TLS
25241 #define TARGET_HAVE_TLS true
25243 #undef TARGET_CANNOT_FORCE_CONST_MEM
25244 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25245 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25246 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25248 #undef TARGET_DELEGITIMIZE_ADDRESS
25249 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25251 #undef TARGET_MS_BITFIELD_LAYOUT_P
25252 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25255 #undef TARGET_BINDS_LOCAL_P
25256 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25258 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25259 #undef TARGET_BINDS_LOCAL_P
25260 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25263 #undef TARGET_ASM_OUTPUT_MI_THUNK
25264 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25265 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25266 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25268 #undef TARGET_ASM_FILE_START
25269 #define TARGET_ASM_FILE_START x86_file_start
25271 #undef TARGET_DEFAULT_TARGET_FLAGS
25272 #define TARGET_DEFAULT_TARGET_FLAGS \
25274 | TARGET_SUBTARGET_DEFAULT \
25275 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25277 #undef TARGET_HANDLE_OPTION
25278 #define TARGET_HANDLE_OPTION ix86_handle_option
25280 #undef TARGET_RTX_COSTS
25281 #define TARGET_RTX_COSTS ix86_rtx_costs
25282 #undef TARGET_ADDRESS_COST
25283 #define TARGET_ADDRESS_COST ix86_address_cost
25285 #undef TARGET_FIXED_CONDITION_CODE_REGS
25286 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25287 #undef TARGET_CC_MODES_COMPATIBLE
25288 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25290 #undef TARGET_MACHINE_DEPENDENT_REORG
25291 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25293 #undef TARGET_BUILD_BUILTIN_VA_LIST
25294 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25296 #undef TARGET_EXPAND_BUILTIN_VA_START
25297 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25299 #undef TARGET_MD_ASM_CLOBBERS
25300 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25302 #undef TARGET_PROMOTE_PROTOTYPES
25303 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25304 #undef TARGET_STRUCT_VALUE_RTX
25305 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25306 #undef TARGET_SETUP_INCOMING_VARARGS
25307 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25308 #undef TARGET_MUST_PASS_IN_STACK
25309 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25310 #undef TARGET_PASS_BY_REFERENCE
25311 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25312 #undef TARGET_INTERNAL_ARG_POINTER
25313 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25314 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25315 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25316 #undef TARGET_STRICT_ARGUMENT_NAMING
25317 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25319 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25320 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25322 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25323 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25325 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25326 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25328 #undef TARGET_C_MODE_FOR_SUFFIX
25329 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25332 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25333 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25336 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25337 #undef TARGET_INSERT_ATTRIBUTES
25338 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25341 #undef TARGET_MANGLE_TYPE
25342 #define TARGET_MANGLE_TYPE ix86_mangle_type
25344 #undef TARGET_STACK_PROTECT_FAIL
25345 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25347 #undef TARGET_FUNCTION_VALUE
25348 #define TARGET_FUNCTION_VALUE ix86_function_value
25350 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25351 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25353 struct gcc_target targetm = TARGET_INITIALIZER;
25355 #include "gt-i386.h"