1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
53 #include "tree-gimple.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
64 static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88 static rtx get_pic_reg (void);
89 static int need_to_save_reg (int regno, int saving);
90 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
94 static void emit_nop_for_insn (rtx insn);
95 static bool insn_clobbers_hbr (rtx insn);
96 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
98 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
99 enum machine_mode dmode);
100 static rtx get_branch_target (rtx branch);
101 static void insert_branch_hints (void);
102 static void insert_nops (void);
103 static void spu_machine_dependent_reorg (void);
104 static int spu_sched_issue_rate (void);
105 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
107 static int get_pipe (rtx insn);
108 static int spu_sched_adjust_priority (rtx insn, int pri);
109 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
112 unsigned char *no_add_attrs);
113 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
115 unsigned char *no_add_attrs);
116 static int spu_naked_function_p (tree func);
117 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
118 const_tree type, unsigned char named);
119 static tree spu_build_builtin_va_list (void);
120 static void spu_va_start (tree, rtx);
121 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
123 static int regno_aligned_for_load (int regno);
124 static int store_with_one_insn_p (rtx mem);
125 static int reg_align (rtx reg);
126 static int mem_is_padded_component_ref (rtx x);
127 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
128 static void spu_asm_globalize_label (FILE * file, const char *name);
129 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
131 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
132 static void spu_init_libfuncs (void);
133 static bool spu_return_in_memory (const_tree type, const_tree fntype);
134 static void fix_range (const char *);
135 static void spu_encode_section_info (tree, rtx, int);
136 static tree spu_builtin_mul_widen_even (tree);
137 static tree spu_builtin_mul_widen_odd (tree);
138 static tree spu_builtin_mask_for_load (void);
139 static int spu_builtin_vectorization_cost (bool);
140 static bool spu_vector_alignment_reachable (const_tree, bool);
141 static int spu_sms_res_mii (struct ddg *g);
143 extern const char *reg_names[];
144 rtx spu_compare_op0, spu_compare_op1;
146 /* Which instruction set architecture to use. */
148 /* Which cpu are we tuning for. */
164 IC_POOL, /* constant pool */
165 IC_IL1, /* one il* instruction */
166 IC_IL2, /* both ilhu and iohl instructions */
167 IC_IL1s, /* one il* instruction */
168 IC_IL2s, /* both ilhu and iohl instructions */
169 IC_FSMBI, /* the fsmbi instruction */
170 IC_CPAT, /* one of the c*d instructions */
171 IC_FSMBI2 /* fsmbi plus 1 other instruction */
174 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
175 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
176 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
177 static enum immediate_class classify_immediate (rtx op,
178 enum machine_mode mode);
180 static enum machine_mode
181 spu_libgcc_cmp_return_mode (void);
183 static enum machine_mode
184 spu_libgcc_shift_count_mode (void);
186 /* Built in types. */
187 tree spu_builtin_types[SPU_BTI_MAX];
189 /* TARGET overrides. */
191 #undef TARGET_INIT_BUILTINS
192 #define TARGET_INIT_BUILTINS spu_init_builtins
194 #undef TARGET_EXPAND_BUILTIN
195 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
197 #undef TARGET_EH_RETURN_FILTER_MODE
198 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
200 /* The .8byte directive doesn't seem to work well for a 32 bit
202 #undef TARGET_ASM_UNALIGNED_DI_OP
203 #define TARGET_ASM_UNALIGNED_DI_OP NULL
205 #undef TARGET_RTX_COSTS
206 #define TARGET_RTX_COSTS spu_rtx_costs
208 #undef TARGET_ADDRESS_COST
209 #define TARGET_ADDRESS_COST hook_int_rtx_0
211 #undef TARGET_SCHED_ISSUE_RATE
212 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
214 #undef TARGET_SCHED_VARIABLE_ISSUE
215 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
217 #undef TARGET_SCHED_ADJUST_PRIORITY
218 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
220 #undef TARGET_SCHED_ADJUST_COST
221 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
223 const struct attribute_spec spu_attribute_table[];
224 #undef TARGET_ATTRIBUTE_TABLE
225 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
227 #undef TARGET_ASM_INTEGER
228 #define TARGET_ASM_INTEGER spu_assemble_integer
230 #undef TARGET_SCALAR_MODE_SUPPORTED_P
231 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
233 #undef TARGET_VECTOR_MODE_SUPPORTED_P
234 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
236 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
237 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
239 #undef TARGET_ASM_GLOBALIZE_LABEL
240 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
242 #undef TARGET_PASS_BY_REFERENCE
243 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
245 #undef TARGET_MUST_PASS_IN_STACK
246 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
248 #undef TARGET_BUILD_BUILTIN_VA_LIST
249 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
251 #undef TARGET_EXPAND_BUILTIN_VA_START
252 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
254 #undef TARGET_SETUP_INCOMING_VARARGS
255 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
257 #undef TARGET_MACHINE_DEPENDENT_REORG
258 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
260 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
261 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
263 #undef TARGET_DEFAULT_TARGET_FLAGS
264 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
266 #undef TARGET_INIT_LIBFUNCS
267 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
269 #undef TARGET_RETURN_IN_MEMORY
270 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
272 #undef TARGET_ENCODE_SECTION_INFO
273 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
275 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
276 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
278 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
279 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
281 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
282 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
284 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
285 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
287 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
288 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
290 #undef TARGET_LIBGCC_CMP_RETURN_MODE
291 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
293 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
294 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
296 #undef TARGET_SCHED_SMS_RES_MII
297 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
299 struct gcc_target targetm = TARGET_INITIALIZER;
302 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
304 /* Override some of the default param values. With so many registers
305 larger values are better for these params. */
306 MAX_PENDING_LIST_LENGTH = 128;
308 /* With so many registers this is better on by default. */
309 flag_rename_registers = 1;
312 /* Sometimes certain combinations of command options do not make sense
313 on a particular target machine. You can define a macro
314 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
315 executed once just after all the command options have been parsed. */
317 spu_override_options (void)
319 /* Small loops will be unpeeled at -O3. For SPU it is more important
320 to keep code small by default. */
321 if (!flag_unroll_loops && !flag_peel_loops
322 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
323 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
325 flag_omit_frame_pointer = 1;
327 if (align_functions < 8)
330 if (spu_fixed_range_string)
331 fix_range (spu_fixed_range_string);
333 /* Determine processor architectural level. */
336 if (strcmp (&spu_arch_string[0], "cell") == 0)
337 spu_arch = PROCESSOR_CELL;
338 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
339 spu_arch = PROCESSOR_CELLEDP;
341 error ("Unknown architecture '%s'", &spu_arch_string[0]);
344 /* Determine processor to tune for. */
347 if (strcmp (&spu_tune_string[0], "cell") == 0)
348 spu_tune = PROCESSOR_CELL;
349 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
350 spu_tune = PROCESSOR_CELLEDP;
352 error ("Unknown architecture '%s'", &spu_tune_string[0]);
356 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
357 struct attribute_spec.handler. */
359 /* Table of machine attributes. */
360 const struct attribute_spec spu_attribute_table[] =
362 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
363 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
364 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
365 { NULL, 0, 0, false, false, false, NULL }
368 /* True if MODE is valid for the target. By "valid", we mean able to
369 be manipulated in non-trivial ways. In particular, this means all
370 the arithmetic is supported. */
372 spu_scalar_mode_supported_p (enum machine_mode mode)
390 /* Similarly for vector modes. "Supported" here is less strict. At
391 least some operations are supported; need to check optabs or builtins
392 for further details. */
394 spu_vector_mode_supported_p (enum machine_mode mode)
411 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
412 least significant bytes of the outer mode. This function returns
413 TRUE for the SUBREG's where this is correct. */
415 valid_subreg (rtx op)
417 enum machine_mode om = GET_MODE (op);
418 enum machine_mode im = GET_MODE (SUBREG_REG (op));
419 return om != VOIDmode && im != VOIDmode
420 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
421 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
424 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
425 and adjust the start offset. */
427 adjust_operand (rtx op, HOST_WIDE_INT * start)
429 enum machine_mode mode;
431 /* Strip any SUBREG */
432 if (GET_CODE (op) == SUBREG)
436 GET_MODE_BITSIZE (GET_MODE (op)) -
437 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
438 op = SUBREG_REG (op);
440 /* If it is smaller than SI, assure a SUBREG */
441 op_size = GET_MODE_BITSIZE (GET_MODE (op));
445 *start += 32 - op_size;
448 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
449 mode = mode_for_size (op_size, MODE_INT, 0);
450 if (mode != GET_MODE (op))
451 op = gen_rtx_SUBREG (mode, op, 0);
456 spu_expand_extv (rtx ops[], int unsignedp)
458 HOST_WIDE_INT width = INTVAL (ops[2]);
459 HOST_WIDE_INT start = INTVAL (ops[3]);
460 HOST_WIDE_INT src_size, dst_size;
461 enum machine_mode src_mode, dst_mode;
462 rtx dst = ops[0], src = ops[1];
465 dst = adjust_operand (ops[0], 0);
466 dst_mode = GET_MODE (dst);
467 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
469 src = adjust_operand (src, &start);
470 src_mode = GET_MODE (src);
471 src_size = GET_MODE_BITSIZE (GET_MODE (src));
475 s = gen_reg_rtx (src_mode);
479 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
482 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
485 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
493 if (width < src_size)
500 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
503 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
506 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
511 s = gen_reg_rtx (src_mode);
512 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
517 convert_move (dst, src, unsignedp);
521 spu_expand_insv (rtx ops[])
523 HOST_WIDE_INT width = INTVAL (ops[1]);
524 HOST_WIDE_INT start = INTVAL (ops[2]);
525 HOST_WIDE_INT maskbits;
526 enum machine_mode dst_mode, src_mode;
527 rtx dst = ops[0], src = ops[3];
528 int dst_size, src_size;
534 if (GET_CODE (ops[0]) == MEM)
535 dst = gen_reg_rtx (TImode);
537 dst = adjust_operand (dst, &start);
538 dst_mode = GET_MODE (dst);
539 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
541 if (CONSTANT_P (src))
543 enum machine_mode m =
544 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
545 src = force_reg (m, convert_to_mode (m, src, 0));
547 src = adjust_operand (src, 0);
548 src_mode = GET_MODE (src);
549 src_size = GET_MODE_BITSIZE (GET_MODE (src));
551 mask = gen_reg_rtx (dst_mode);
552 shift_reg = gen_reg_rtx (dst_mode);
553 shift = dst_size - start - width;
555 /* It's not safe to use subreg here because the compiler assumes
556 that the SUBREG_REG is right justified in the SUBREG. */
557 convert_move (shift_reg, src, 1);
564 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
567 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
570 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
582 maskbits = (-1ll << (32 - width - start));
584 maskbits += (1ll << (32 - start));
585 emit_move_insn (mask, GEN_INT (maskbits));
588 maskbits = (-1ll << (64 - width - start));
590 maskbits += (1ll << (64 - start));
591 emit_move_insn (mask, GEN_INT (maskbits));
595 unsigned char arr[16];
597 memset (arr, 0, sizeof (arr));
598 arr[i] = 0xff >> (start & 7);
599 for (i++; i <= (start + width - 1) / 8; i++)
601 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
602 emit_move_insn (mask, array_to_constant (TImode, arr));
608 if (GET_CODE (ops[0]) == MEM)
610 rtx aligned = gen_reg_rtx (SImode);
611 rtx low = gen_reg_rtx (SImode);
612 rtx addr = gen_reg_rtx (SImode);
613 rtx rotl = gen_reg_rtx (SImode);
614 rtx mask0 = gen_reg_rtx (TImode);
617 emit_move_insn (addr, XEXP (ops[0], 0));
618 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
619 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
620 emit_insn (gen_negsi2 (rotl, low));
621 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
622 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
623 mem = change_address (ops[0], TImode, aligned);
624 set_mem_alias_set (mem, 0);
625 emit_move_insn (dst, mem);
626 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
627 emit_move_insn (mem, dst);
628 if (start + width > MEM_ALIGN (ops[0]))
630 rtx shl = gen_reg_rtx (SImode);
631 rtx mask1 = gen_reg_rtx (TImode);
632 rtx dst1 = gen_reg_rtx (TImode);
634 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
635 emit_insn (gen_shlqby_ti (mask1, mask, shl));
636 mem1 = adjust_address (mem, TImode, 16);
637 set_mem_alias_set (mem1, 0);
638 emit_move_insn (dst1, mem1);
639 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
640 emit_move_insn (mem1, dst1);
644 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
649 spu_expand_block_move (rtx ops[])
651 HOST_WIDE_INT bytes, align, offset;
652 rtx src, dst, sreg, dreg, target;
654 if (GET_CODE (ops[2]) != CONST_INT
655 || GET_CODE (ops[3]) != CONST_INT
656 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
659 bytes = INTVAL (ops[2]);
660 align = INTVAL (ops[3]);
670 for (offset = 0; offset + 16 <= bytes; offset += 16)
672 dst = adjust_address (ops[0], V16QImode, offset);
673 src = adjust_address (ops[1], V16QImode, offset);
674 emit_move_insn (dst, src);
679 unsigned char arr[16] = { 0 };
680 for (i = 0; i < bytes - offset; i++)
682 dst = adjust_address (ops[0], V16QImode, offset);
683 src = adjust_address (ops[1], V16QImode, offset);
684 mask = gen_reg_rtx (V16QImode);
685 sreg = gen_reg_rtx (V16QImode);
686 dreg = gen_reg_rtx (V16QImode);
687 target = gen_reg_rtx (V16QImode);
688 emit_move_insn (mask, array_to_constant (V16QImode, arr));
689 emit_move_insn (dreg, dst);
690 emit_move_insn (sreg, src);
691 emit_insn (gen_selb (target, dreg, sreg, mask));
692 emit_move_insn (dst, target);
700 { SPU_EQ, SPU_GT, SPU_GTU };
702 int spu_comp_icode[12][3] = {
703 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
704 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
705 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
706 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
707 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
708 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
709 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
710 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
711 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
712 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
713 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
714 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
717 /* Generate a compare for CODE. Return a brand-new rtx that represents
718 the result of the compare. GCC can figure this out too if we don't
719 provide all variations of compares, but GCC always wants to use
720 WORD_MODE, we can generate better code in most cases if we do it
723 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
725 int reverse_compare = 0;
726 int reverse_test = 0;
727 rtx compare_result, eq_result;
728 rtx comp_rtx, eq_rtx;
729 rtx target = operands[0];
730 enum machine_mode comp_mode;
731 enum machine_mode op_mode;
732 enum spu_comp_code scode, eq_code, ior_code;
736 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
737 and so on, to keep the constant in operand 1. */
738 if (GET_CODE (spu_compare_op1) == CONST_INT)
740 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
741 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
745 spu_compare_op1 = GEN_INT (val);
749 spu_compare_op1 = GEN_INT (val);
753 spu_compare_op1 = GEN_INT (val);
757 spu_compare_op1 = GEN_INT (val);
766 op_mode = GET_MODE (spu_compare_op0);
772 if (HONOR_NANS (op_mode))
787 if (HONOR_NANS (op_mode))
879 comp_mode = V4SImode;
883 comp_mode = V2DImode;
890 if (GET_MODE (spu_compare_op1) == DFmode
891 && (scode != SPU_GT && scode != SPU_EQ))
894 if (is_set == 0 && spu_compare_op1 == const0_rtx
895 && (GET_MODE (spu_compare_op0) == SImode
896 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
898 /* Don't need to set a register with the result when we are
899 comparing against zero and branching. */
900 reverse_test = !reverse_test;
901 compare_result = spu_compare_op0;
905 compare_result = gen_reg_rtx (comp_mode);
909 rtx t = spu_compare_op1;
910 spu_compare_op1 = spu_compare_op0;
914 if (spu_comp_icode[index][scode] == 0)
917 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
918 (spu_compare_op0, op_mode))
919 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
920 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
921 (spu_compare_op1, op_mode))
922 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
923 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
928 emit_insn (comp_rtx);
932 eq_result = gen_reg_rtx (comp_mode);
933 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
939 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
940 gcc_assert (ior_code != CODE_FOR_nothing);
941 emit_insn (GEN_FCN (ior_code)
942 (compare_result, compare_result, eq_result));
951 /* We don't have branch on QI compare insns, so we convert the
952 QI compare result to a HI result. */
953 if (comp_mode == QImode)
955 rtx old_res = compare_result;
956 compare_result = gen_reg_rtx (HImode);
958 emit_insn (gen_extendqihi2 (compare_result, old_res));
962 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
964 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
966 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
967 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
968 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
971 else if (is_set == 2)
973 int compare_size = GET_MODE_BITSIZE (comp_mode);
974 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
975 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
977 rtx op_t = operands[2];
978 rtx op_f = operands[3];
980 /* The result of the comparison can be SI, HI or QI mode. Create a
981 mask based on that result. */
982 if (target_size > compare_size)
984 select_mask = gen_reg_rtx (mode);
985 emit_insn (gen_extend_compare (select_mask, compare_result));
987 else if (target_size < compare_size)
989 gen_rtx_SUBREG (mode, compare_result,
990 (compare_size - target_size) / BITS_PER_UNIT);
991 else if (comp_mode != mode)
992 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
994 select_mask = compare_result;
996 if (GET_MODE (target) != GET_MODE (op_t)
997 || GET_MODE (target) != GET_MODE (op_f))
1001 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1003 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1008 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1009 gen_rtx_NOT (comp_mode, compare_result)));
1010 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1011 emit_insn (gen_extendhisi2 (target, compare_result));
1012 else if (GET_MODE (target) == SImode
1013 && GET_MODE (compare_result) == QImode)
1014 emit_insn (gen_extend_compare (target, compare_result));
1016 emit_move_insn (target, compare_result);
1021 const_double_to_hwint (rtx x)
1025 if (GET_MODE (x) == SFmode)
1027 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1028 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1030 else if (GET_MODE (x) == DFmode)
1033 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1034 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1036 val = (val << 32) | (l[1] & 0xffffffff);
1044 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1048 gcc_assert (mode == SFmode || mode == DFmode);
1051 tv[0] = (v << 32) >> 32;
1052 else if (mode == DFmode)
1054 tv[1] = (v << 32) >> 32;
1057 real_from_target (&rv, tv, mode);
1058 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1062 print_operand_address (FILE * file, register rtx addr)
1067 if (GET_CODE (addr) == AND
1068 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1069 && INTVAL (XEXP (addr, 1)) == -16)
1070 addr = XEXP (addr, 0);
1072 switch (GET_CODE (addr))
1075 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1079 reg = XEXP (addr, 0);
1080 offset = XEXP (addr, 1);
1081 if (GET_CODE (offset) == REG)
1083 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1084 reg_names[REGNO (offset)]);
1086 else if (GET_CODE (offset) == CONST_INT)
1088 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1089 INTVAL (offset), reg_names[REGNO (reg)]);
1099 output_addr_const (file, addr);
1109 print_operand (FILE * file, rtx x, int code)
1111 enum machine_mode mode = GET_MODE (x);
1113 unsigned char arr[16];
1114 int xcode = GET_CODE (x);
1116 if (GET_MODE (x) == VOIDmode)
1119 case 'L': /* 128 bits, signed */
1120 case 'm': /* 128 bits, signed */
1121 case 'T': /* 128 bits, signed */
1122 case 't': /* 128 bits, signed */
1125 case 'K': /* 64 bits, signed */
1126 case 'k': /* 64 bits, signed */
1127 case 'D': /* 64 bits, signed */
1128 case 'd': /* 64 bits, signed */
1131 case 'J': /* 32 bits, signed */
1132 case 'j': /* 32 bits, signed */
1133 case 's': /* 32 bits, signed */
1134 case 'S': /* 32 bits, signed */
1141 case 'j': /* 32 bits, signed */
1142 case 'k': /* 64 bits, signed */
1143 case 'm': /* 128 bits, signed */
1144 if (xcode == CONST_INT
1145 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1147 gcc_assert (logical_immediate_p (x, mode));
1148 constant_to_array (mode, x, arr);
1149 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1150 val = trunc_int_for_mode (val, SImode);
1151 switch (which_logical_immediate (val))
1156 fprintf (file, "h");
1159 fprintf (file, "b");
1169 case 'J': /* 32 bits, signed */
1170 case 'K': /* 64 bits, signed */
1171 case 'L': /* 128 bits, signed */
1172 if (xcode == CONST_INT
1173 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1175 gcc_assert (logical_immediate_p (x, mode)
1176 || iohl_immediate_p (x, mode));
1177 constant_to_array (mode, x, arr);
1178 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1179 val = trunc_int_for_mode (val, SImode);
1180 switch (which_logical_immediate (val))
1186 val = trunc_int_for_mode (val, HImode);
1189 val = trunc_int_for_mode (val, QImode);
1194 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1200 case 't': /* 128 bits, signed */
1201 case 'd': /* 64 bits, signed */
1202 case 's': /* 32 bits, signed */
1205 enum immediate_class c = classify_immediate (x, mode);
1209 constant_to_array (mode, x, arr);
1210 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1211 val = trunc_int_for_mode (val, SImode);
1212 switch (which_immediate_load (val))
1217 fprintf (file, "a");
1220 fprintf (file, "h");
1223 fprintf (file, "hu");
1230 constant_to_array (mode, x, arr);
1231 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1233 fprintf (file, "b");
1235 fprintf (file, "h");
1237 fprintf (file, "w");
1239 fprintf (file, "d");
1242 if (xcode == CONST_VECTOR)
1244 x = CONST_VECTOR_ELT (x, 0);
1245 xcode = GET_CODE (x);
1247 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1248 fprintf (file, "a");
1249 else if (xcode == HIGH)
1250 fprintf (file, "hu");
1264 case 'T': /* 128 bits, signed */
1265 case 'D': /* 64 bits, signed */
1266 case 'S': /* 32 bits, signed */
1269 enum immediate_class c = classify_immediate (x, mode);
1273 constant_to_array (mode, x, arr);
1274 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1275 val = trunc_int_for_mode (val, SImode);
1276 switch (which_immediate_load (val))
1283 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1288 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1291 constant_to_array (mode, x, arr);
1293 for (i = 0; i < 16; i++)
1298 print_operand (file, GEN_INT (val), 0);
1301 constant_to_array (mode, x, arr);
1302 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1303 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1308 if (GET_CODE (x) == CONST_VECTOR)
1309 x = CONST_VECTOR_ELT (x, 0);
1310 output_addr_const (file, x);
1312 fprintf (file, "@h");
1326 if (xcode == CONST_INT)
1328 /* Only 4 least significant bits are relevant for generate
1329 control word instructions. */
1330 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1335 case 'M': /* print code for c*d */
1336 if (GET_CODE (x) == CONST_INT)
1340 fprintf (file, "b");
1343 fprintf (file, "h");
1346 fprintf (file, "w");
1349 fprintf (file, "d");
1358 case 'N': /* Negate the operand */
1359 if (xcode == CONST_INT)
1360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1361 else if (xcode == CONST_VECTOR)
1362 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1363 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1366 case 'I': /* enable/disable interrupts */
1367 if (xcode == CONST_INT)
1368 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1371 case 'b': /* branch modifiers */
1373 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1374 else if (COMPARISON_P (x))
1375 fprintf (file, "%s", xcode == NE ? "n" : "");
1378 case 'i': /* indirect call */
1381 if (GET_CODE (XEXP (x, 0)) == REG)
1382 /* Used in indirect function calls. */
1383 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1385 output_address (XEXP (x, 0));
1389 case 'p': /* load/store */
1393 xcode = GET_CODE (x);
1398 xcode = GET_CODE (x);
1401 fprintf (file, "d");
1402 else if (xcode == CONST_INT)
1403 fprintf (file, "a");
1404 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1405 fprintf (file, "r");
1406 else if (xcode == PLUS || xcode == LO_SUM)
1408 if (GET_CODE (XEXP (x, 1)) == REG)
1409 fprintf (file, "x");
1411 fprintf (file, "d");
1416 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1418 output_addr_const (file, GEN_INT (val));
1422 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1424 output_addr_const (file, GEN_INT (val));
1428 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1430 output_addr_const (file, GEN_INT (val));
1434 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1435 val = (val >> 3) & 0x1f;
1436 output_addr_const (file, GEN_INT (val));
1440 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1443 output_addr_const (file, GEN_INT (val));
1447 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1450 output_addr_const (file, GEN_INT (val));
1454 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1457 output_addr_const (file, GEN_INT (val));
1461 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1462 val = -(val & -8ll);
1463 val = (val >> 3) & 0x1f;
1464 output_addr_const (file, GEN_INT (val));
1469 fprintf (file, "%s", reg_names[REGNO (x)]);
1470 else if (xcode == MEM)
1471 output_address (XEXP (x, 0));
1472 else if (xcode == CONST_VECTOR)
1473 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1475 output_addr_const (file, x);
1482 output_operand_lossage ("invalid %%xn code");
1487 extern char call_used_regs[];
1489 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1490 caller saved register. For leaf functions it is more efficient to
1491 use a volatile register because we won't need to save and restore the
1492 pic register. This routine is only valid after register allocation
1493 is completed, so we can pick an unused register. */
1497 rtx pic_reg = pic_offset_table_rtx;
1498 if (!reload_completed && !reload_in_progress)
1503 /* Split constant addresses to handle cases that are too large.
1504 Add in the pic register when in PIC mode.
1505 Split immediates that require more than 1 instruction. */
1507 spu_split_immediate (rtx * ops)
1509 enum machine_mode mode = GET_MODE (ops[0]);
1510 enum immediate_class c = classify_immediate (ops[1], mode);
1516 unsigned char arrhi[16];
1517 unsigned char arrlo[16];
1520 constant_to_array (mode, ops[1], arrhi);
1521 to = !can_create_pseudo_p () ? ops[0] : gen_reg_rtx (mode);
1522 for (i = 0; i < 16; i += 4)
1524 arrlo[i + 2] = arrhi[i + 2];
1525 arrlo[i + 3] = arrhi[i + 3];
1526 arrlo[i + 0] = arrlo[i + 1] = 0;
1527 arrhi[i + 2] = arrhi[i + 3] = 0;
1529 hi = array_to_constant (mode, arrhi);
1530 lo = array_to_constant (mode, arrlo);
1531 emit_move_insn (to, hi);
1532 emit_insn (gen_rtx_SET
1533 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1538 unsigned char arr_fsmbi[16];
1539 unsigned char arr_andbi[16];
1540 rtx to, reg_fsmbi, reg_and;
1542 enum machine_mode imode = mode;
1543 /* We need to do reals as ints because the constant used in the
1544 * AND might not be a legitimate real constant. */
1545 imode = int_mode_for_mode (mode);
1546 constant_to_array (mode, ops[1], arr_fsmbi);
1548 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1551 for (i = 0; i < 16; i++)
1552 if (arr_fsmbi[i] != 0)
1554 arr_andbi[0] = arr_fsmbi[i];
1555 arr_fsmbi[i] = 0xff;
1557 for (i = 1; i < 16; i++)
1558 arr_andbi[i] = arr_andbi[0];
1559 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1560 reg_and = array_to_constant (imode, arr_andbi);
1561 emit_move_insn (to, reg_fsmbi);
1562 emit_insn (gen_rtx_SET
1563 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1567 if (reload_in_progress || reload_completed)
1569 rtx mem = force_const_mem (mode, ops[1]);
1570 if (TARGET_LARGE_MEM)
1572 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1573 emit_move_insn (addr, XEXP (mem, 0));
1574 mem = replace_equiv_address (mem, addr);
1576 emit_move_insn (ops[0], mem);
1582 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1586 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1587 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1590 emit_insn (gen_pic (ops[0], ops[1]));
1593 rtx pic_reg = get_pic_reg ();
1594 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1595 current_function_uses_pic_offset_table = 1;
1597 return flag_pic || c == IC_IL2s;
1608 /* SAVING is TRUE when we are generating the actual load and store
1609 instructions for REGNO. When determining the size of the stack
1610 needed for saving register we must allocate enough space for the
1611 worst case, because we don't always have the information early enough
1612 to not allocate it. But we can at least eliminate the actual loads
1613 and stores during the prologue/epilogue. */
1615 need_to_save_reg (int regno, int saving)
1617 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1620 && regno == PIC_OFFSET_TABLE_REGNUM
1621 && (!saving || current_function_uses_pic_offset_table)
1623 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1628 /* This function is only correct starting with local register
1631 spu_saved_regs_size (void)
1633 int reg_save_size = 0;
1636 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1637 if (need_to_save_reg (regno, 0))
1638 reg_save_size += 0x10;
1639 return reg_save_size;
1643 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1645 rtx reg = gen_rtx_REG (V4SImode, regno);
1647 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1648 return emit_insn (gen_movv4si (mem, reg));
1652 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1654 rtx reg = gen_rtx_REG (V4SImode, regno);
1656 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1657 return emit_insn (gen_movv4si (reg, mem));
1660 /* This happens after reload, so we need to expand it. */
1662 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1665 if (satisfies_constraint_K (GEN_INT (imm)))
1667 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1671 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1672 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1673 if (REGNO (src) == REGNO (scratch))
1679 /* Return nonzero if this function is known to have a null epilogue. */
1682 direct_return (void)
1684 if (reload_completed)
1686 if (cfun->static_chain_decl == 0
1687 && (spu_saved_regs_size ()
1689 + current_function_outgoing_args_size
1690 + current_function_pretend_args_size == 0)
1691 && current_function_is_leaf)
1698 The stack frame looks like this:
1705 prev SP | back chain |
1708 | reg save | current_function_pretend_args_size bytes
1711 | saved regs | spu_saved_regs_size() bytes
1714 FP | vars | get_frame_size() bytes
1718 | args | current_function_outgoing_args_size bytes
1728 spu_expand_prologue (void)
1730 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1731 HOST_WIDE_INT total_size;
1732 HOST_WIDE_INT saved_regs_size;
1733 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1734 rtx scratch_reg_0, scratch_reg_1;
1737 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1738 the "toplevel" insn chain. */
1739 emit_note (NOTE_INSN_DELETED);
1741 if (flag_pic && optimize == 0)
1742 current_function_uses_pic_offset_table = 1;
1744 if (spu_naked_function_p (current_function_decl))
1747 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1748 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1750 saved_regs_size = spu_saved_regs_size ();
1751 total_size = size + saved_regs_size
1752 + current_function_outgoing_args_size
1753 + current_function_pretend_args_size;
1755 if (!current_function_is_leaf
1756 || current_function_calls_alloca || total_size > 0)
1757 total_size += STACK_POINTER_OFFSET;
1759 /* Save this first because code after this might use the link
1760 register as a scratch register. */
1761 if (!current_function_is_leaf)
1763 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1764 RTX_FRAME_RELATED_P (insn) = 1;
1769 offset = -current_function_pretend_args_size;
1770 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1771 if (need_to_save_reg (regno, 1))
1774 insn = frame_emit_store (regno, sp_reg, offset);
1775 RTX_FRAME_RELATED_P (insn) = 1;
1779 if (flag_pic && current_function_uses_pic_offset_table)
1781 rtx pic_reg = get_pic_reg ();
1782 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1783 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1788 if (flag_stack_check)
1790 /* We compare against total_size-1 because
1791 ($sp >= total_size) <=> ($sp > total_size-1) */
1792 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1793 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1794 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1795 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1797 emit_move_insn (scratch_v4si, size_v4si);
1798 size_v4si = scratch_v4si;
1800 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1801 emit_insn (gen_vec_extractv4si
1802 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1803 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1806 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1807 the value of the previous $sp because we save it as the back
1809 if (total_size <= 2000)
1811 /* In this case we save the back chain first. */
1812 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1814 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1816 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1818 insn = emit_move_insn (scratch_reg_0, sp_reg);
1820 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1824 insn = emit_move_insn (scratch_reg_0, sp_reg);
1826 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1828 RTX_FRAME_RELATED_P (insn) = 1;
1829 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1831 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1833 if (total_size > 2000)
1835 /* Save the back chain ptr */
1836 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1839 if (frame_pointer_needed)
1841 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1842 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1843 + current_function_outgoing_args_size;
1844 /* Set the new frame_pointer */
1845 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1846 RTX_FRAME_RELATED_P (insn) = 1;
1847 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1849 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1850 real, REG_NOTES (insn));
1851 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1855 emit_note (NOTE_INSN_DELETED);
1859 spu_expand_epilogue (bool sibcall_p)
1861 int size = get_frame_size (), offset, regno;
1862 HOST_WIDE_INT saved_regs_size, total_size;
1863 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1864 rtx jump, scratch_reg_0;
1866 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1867 the "toplevel" insn chain. */
1868 emit_note (NOTE_INSN_DELETED);
1870 if (spu_naked_function_p (current_function_decl))
1873 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1875 saved_regs_size = spu_saved_regs_size ();
1876 total_size = size + saved_regs_size
1877 + current_function_outgoing_args_size
1878 + current_function_pretend_args_size;
1880 if (!current_function_is_leaf
1881 || current_function_calls_alloca || total_size > 0)
1882 total_size += STACK_POINTER_OFFSET;
1886 if (current_function_calls_alloca)
1887 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1889 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1892 if (saved_regs_size > 0)
1894 offset = -current_function_pretend_args_size;
1895 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1896 if (need_to_save_reg (regno, 1))
1899 frame_emit_load (regno, sp_reg, offset);
1904 if (!current_function_is_leaf)
1905 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1909 emit_insn (gen_rtx_USE
1910 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1911 jump = emit_jump_insn (gen__return ());
1912 emit_barrier_after (jump);
1915 emit_note (NOTE_INSN_DELETED);
1919 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1923 /* This is inefficient because it ends up copying to a save-register
1924 which then gets saved even though $lr has already been saved. But
1925 it does generate better code for leaf functions and we don't need
1926 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1927 used for __builtin_return_address anyway, so maybe we don't care if
1928 it's inefficient. */
1929 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1933 /* Given VAL, generate a constant appropriate for MODE.
1934 If MODE is a vector mode, every element will be VAL.
1935 For TImode, VAL will be zero extended to 128 bits. */
1937 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1943 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1944 || GET_MODE_CLASS (mode) == MODE_FLOAT
1945 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1946 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1948 if (GET_MODE_CLASS (mode) == MODE_INT)
1949 return immed_double_const (val, 0, mode);
1951 /* val is the bit representation of the float */
1952 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1953 return hwint_to_const_double (mode, val);
1955 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1956 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1958 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1960 units = GET_MODE_NUNITS (mode);
1962 v = rtvec_alloc (units);
1964 for (i = 0; i < units; ++i)
1965 RTVEC_ELT (v, i) = inner;
1967 return gen_rtx_CONST_VECTOR (mode, v);
1970 /* branch hint stuff */
1972 /* The hardware requires 8 insns between a hint and the branch it
1973 effects. This variable describes how many rtl instructions the
1974 compiler needs to see before inserting a hint. (FIXME: We should
1975 accept less and insert nops to enforce it because hinting is always
1976 profitable for performance, but we do need to be careful of code
1978 int spu_hint_dist = (8 * 4);
1980 /* Create a MODE vector constant from 4 ints. */
1982 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1984 unsigned char arr[16];
1985 arr[0] = (a >> 24) & 0xff;
1986 arr[1] = (a >> 16) & 0xff;
1987 arr[2] = (a >> 8) & 0xff;
1988 arr[3] = (a >> 0) & 0xff;
1989 arr[4] = (b >> 24) & 0xff;
1990 arr[5] = (b >> 16) & 0xff;
1991 arr[6] = (b >> 8) & 0xff;
1992 arr[7] = (b >> 0) & 0xff;
1993 arr[8] = (c >> 24) & 0xff;
1994 arr[9] = (c >> 16) & 0xff;
1995 arr[10] = (c >> 8) & 0xff;
1996 arr[11] = (c >> 0) & 0xff;
1997 arr[12] = (d >> 24) & 0xff;
1998 arr[13] = (d >> 16) & 0xff;
1999 arr[14] = (d >> 8) & 0xff;
2000 arr[15] = (d >> 0) & 0xff;
2001 return array_to_constant(mode, arr);
2004 /* An array of these is used to propagate hints to predecessor blocks. */
2007 rtx prop_jump; /* propagated from another block */
2008 basic_block bb; /* the original block. */
2011 /* The special $hbr register is used to prevent the insn scheduler from
2012 moving hbr insns across instructions which invalidate them. It
2013 should only be used in a clobber, and this function searches for
2014 insns which clobber it. */
2016 insn_clobbers_hbr (rtx insn)
2018 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
2020 rtx parallel = PATTERN (insn);
2023 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2025 clobber = XVECEXP (parallel, 0, j);
2026 if (GET_CODE (clobber) == CLOBBER
2027 && GET_CODE (XEXP (clobber, 0)) == REG
2028 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2036 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
2039 rtx hint, insn, prev, next;
2041 if (before == 0 || branch == 0 || target == 0)
2048 branch_label = gen_label_rtx ();
2049 LABEL_NUSES (branch_label)++;
2050 LABEL_PRESERVE_P (branch_label) = 1;
2051 insn = emit_label_before (branch_label, branch);
2052 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2054 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2055 the current insn is pipe0, dual issue with it. */
2056 prev = prev_active_insn (before);
2057 if (prev && get_pipe (prev) == 0)
2058 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2059 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
2061 next = next_active_insn (before);
2062 hint = emit_insn_after (gen_hbr (branch_label, target), before);
2064 PUT_MODE (next, TImode);
2068 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2069 PUT_MODE (hint, TImode);
2071 recog_memoized (hint);
2074 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2075 the rtx for the branch target. */
2077 get_branch_target (rtx branch)
2079 if (GET_CODE (branch) == JUMP_INSN)
2083 /* Return statements */
2084 if (GET_CODE (PATTERN (branch)) == RETURN)
2085 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2088 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2089 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2092 set = single_set (branch);
2093 src = SET_SRC (set);
2094 if (GET_CODE (SET_DEST (set)) != PC)
2097 if (GET_CODE (src) == IF_THEN_ELSE)
2100 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2103 /* If the more probable case is not a fall through, then
2104 try a branch hint. */
2105 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2106 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2107 && GET_CODE (XEXP (src, 1)) != PC)
2108 lab = XEXP (src, 1);
2109 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2110 && GET_CODE (XEXP (src, 2)) != PC)
2111 lab = XEXP (src, 2);
2115 if (GET_CODE (lab) == RETURN)
2116 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2124 else if (GET_CODE (branch) == CALL_INSN)
2127 /* All of our call patterns are in a PARALLEL and the CALL is
2128 the first pattern in the PARALLEL. */
2129 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2131 call = XVECEXP (PATTERN (branch), 0, 0);
2132 if (GET_CODE (call) == SET)
2133 call = SET_SRC (call);
2134 if (GET_CODE (call) != CALL)
2136 return XEXP (XEXP (call, 0), 0);
2142 insert_branch_hints (void)
2144 struct spu_bb_info *spu_bb_info;
2145 rtx branch, insn, next;
2146 rtx branch_target = 0;
2147 int branch_addr = 0, insn_addr, head_addr;
2152 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
2153 sizeof (struct spu_bb_info));
2155 /* We need exact insn addresses and lengths. */
2156 shorten_branches (get_insns ());
2158 FOR_EACH_BB_REVERSE (bb)
2160 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
2162 if (spu_bb_info[bb->index].prop_jump)
2164 branch = spu_bb_info[bb->index].prop_jump;
2165 branch_target = get_branch_target (branch);
2166 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2168 /* Search from end of a block to beginning. In this loop, find
2169 jumps which need a branch and emit them only when:
2170 - it's an indirect branch and we're at the insn which sets
2172 - we're at an insn that will invalidate the hint. e.g., a
2173 call, another hint insn, inline asm that clobbers $hbr, and
2174 some inlined operations (divmodsi4). Don't consider jumps
2175 because they are only at the end of a block and are
2176 considered when we are deciding whether to propagate
2177 - we're getting too far away from the branch. The hbr insns
2178 only have a signed 10-bit offset
2179 We go back as far as possible so the branch will be considered
2180 for propagation when we get to the beginning of the block. */
2182 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2186 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2188 && ((GET_CODE (branch_target) == REG
2189 && set_of (branch_target, insn) != NULL_RTX)
2190 || insn_clobbers_hbr (insn)
2191 || branch_addr - insn_addr > 600))
2193 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2194 if (insn != BB_END (bb)
2195 && branch_addr - next_addr >= spu_hint_dist)
2199 "hint for %i in block %i before %i\n",
2200 INSN_UID (branch), bb->index, INSN_UID (next));
2201 spu_emit_branch_hint (next, branch, branch_target,
2202 branch_addr - next_addr);
2207 /* JUMP_P will only be true at the end of a block. When
2208 branch is already set it means we've previously decided
2209 to propagate a hint for that branch into this block. */
2210 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2213 if ((branch_target = get_branch_target (insn)))
2216 branch_addr = insn_addr;
2220 /* When a branch hint is emitted it will be inserted
2221 before "next". Make sure next is the beginning of a
2222 cycle to minimize impact on the scheduled insns. */
2223 if (GET_MODE (insn) == TImode)
2226 if (insn == BB_HEAD (bb))
2232 /* If we haven't emitted a hint for this branch yet, it might
2233 be profitable to emit it in one of the predecessor blocks,
2234 especially for loops. */
2236 basic_block prev = 0, prop = 0, prev2 = 0;
2237 int loop_exit = 0, simple_loop = 0;
2240 next_addr = INSN_ADDRESSES (INSN_UID (next));
2242 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2243 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2244 prev = EDGE_PRED (bb, j)->src;
2246 prev2 = EDGE_PRED (bb, j)->src;
2248 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2249 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2251 else if (EDGE_SUCC (bb, j)->dest == bb)
2254 /* If this branch is a loop exit then propagate to previous
2255 fallthru block. This catches the cases when it is a simple
2256 loop or when there is an initial branch into the loop. */
2257 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2260 /* If there is only one adjacent predecessor. Don't propagate
2261 outside this loop. This loop_depth test isn't perfect, but
2262 I'm not sure the loop_father member is valid at this point. */
2263 else if (prev && single_pred_p (bb)
2264 && prev->loop_depth == bb->loop_depth)
2267 /* If this is the JOIN block of a simple IF-THEN then
2268 propagate the hint to the HEADER block. */
2269 else if (prev && prev2
2270 && EDGE_COUNT (bb->preds) == 2
2271 && EDGE_COUNT (prev->preds) == 1
2272 && EDGE_PRED (prev, 0)->src == prev2
2273 && prev2->loop_depth == bb->loop_depth
2274 && GET_CODE (branch_target) != REG)
2277 /* Don't propagate when:
2278 - this is a simple loop and the hint would be too far
2279 - this is not a simple loop and there are 16 insns in
2281 - the predecessor block ends in a branch that will be
2283 - the predecessor block ends in an insn that invalidates
2287 && (bbend = BB_END (prop))
2288 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2289 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2290 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2293 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2294 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2295 bb->index, prop->index, bb->loop_depth,
2296 INSN_UID (branch), loop_exit, simple_loop,
2297 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2299 spu_bb_info[prop->index].prop_jump = branch;
2300 spu_bb_info[prop->index].bb = bb;
2302 else if (next && branch_addr - next_addr >= spu_hint_dist)
2305 fprintf (dump_file, "hint for %i in block %i before %i\n",
2306 INSN_UID (branch), bb->index, INSN_UID (next));
2307 spu_emit_branch_hint (next, branch, branch_target,
2308 branch_addr - next_addr);
2316 /* Emit a nop for INSN such that the two will dual issue. This assumes
2317 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2318 We check for TImode to handle a MULTI1 insn which has dual issued its
2319 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2322 emit_nop_for_insn (rtx insn)
2326 p = get_pipe (insn);
2327 if (p == 1 && GET_MODE (insn) == TImode)
2329 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2330 PUT_MODE (new_insn, TImode);
2331 PUT_MODE (insn, VOIDmode);
2334 new_insn = emit_insn_after (gen_lnop (), insn);
2337 /* Insert nops in basic blocks to meet dual issue alignment
2342 rtx insn, next_insn, prev_insn;
2346 /* This sets up INSN_ADDRESSES. */
2347 shorten_branches (get_insns ());
2349 /* Keep track of length added by nops. */
2353 for (insn = get_insns (); insn; insn = next_insn)
2355 next_insn = next_active_insn (insn);
2356 addr = INSN_ADDRESSES (INSN_UID (insn));
2357 if (GET_MODE (insn) == TImode
2359 && GET_MODE (next_insn) != TImode
2360 && ((addr + length) & 7) != 0)
2362 /* prev_insn will always be set because the first insn is
2363 always 8-byte aligned. */
2364 emit_nop_for_insn (prev_insn);
2372 spu_machine_dependent_reorg (void)
2376 if (TARGET_BRANCH_HINTS)
2377 insert_branch_hints ();
2383 /* Insn scheduling routines, primarily for dual issue. */
2385 spu_sched_issue_rate (void)
2391 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2392 int verbose ATTRIBUTE_UNUSED, rtx insn,
2395 if (GET_CODE (PATTERN (insn)) != USE
2396 && GET_CODE (PATTERN (insn)) != CLOBBER
2397 && get_pipe (insn) != -2)
2399 return can_issue_more;
2406 /* Handle inline asm */
2407 if (INSN_CODE (insn) == -1)
2409 t = get_attr_type (insn);
2425 case TYPE_IPREFETCH:
2442 spu_sched_adjust_priority (rtx insn, int pri)
2444 int p = get_pipe (insn);
2445 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2447 if (GET_CODE (PATTERN (insn)) == USE
2448 || GET_CODE (PATTERN (insn)) == CLOBBER
2451 /* Schedule pipe0 insns early for greedier dual issue. */
2457 /* INSN is dependent on DEP_INSN. */
2459 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2460 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2462 if (GET_CODE (insn) == CALL_INSN)
2464 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2465 scheduler makes every insn in a block anti-dependent on the final
2466 jump_insn. We adjust here so higher cost insns will get scheduled
2468 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2469 return insn_cost (dep_insn) - 3;
2473 /* Create a CONST_DOUBLE from a string. */
2475 spu_float_const (const char *string, enum machine_mode mode)
2477 REAL_VALUE_TYPE value;
2478 value = REAL_VALUE_ATOF (string, mode);
2479 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2483 spu_constant_address_p (rtx x)
2485 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2486 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2487 || GET_CODE (x) == HIGH);
2490 static enum spu_immediate
2491 which_immediate_load (HOST_WIDE_INT val)
2493 gcc_assert (val == trunc_int_for_mode (val, SImode));
2495 if (val >= -0x8000 && val <= 0x7fff)
2497 if (val >= 0 && val <= 0x3ffff)
2499 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2501 if ((val & 0xffff) == 0)
2507 /* Return true when OP can be loaded by one of the il instructions, or
2508 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2510 immediate_load_p (rtx op, enum machine_mode mode)
2512 if (CONSTANT_P (op))
2514 enum immediate_class c = classify_immediate (op, mode);
2515 return c == IC_IL1 || c == IC_IL1s
2516 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
2521 /* Return true if the first SIZE bytes of arr is a constant that can be
2522 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2523 represent the size and offset of the instruction to use. */
2525 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2527 int cpat, run, i, start;
2531 for (i = 0; i < size && cpat; i++)
2539 else if (arr[i] == 2 && arr[i+1] == 3)
2541 else if (arr[i] == 0)
2543 while (arr[i+run] == run && i+run < 16)
2545 if (run != 4 && run != 8)
2550 if ((i & (run-1)) != 0)
2557 if (cpat && (run || size < 16))
2564 *pstart = start == -1 ? 16-run : start;
2570 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2571 it into a register. MODE is only valid when OP is a CONST_INT. */
2572 static enum immediate_class
2573 classify_immediate (rtx op, enum machine_mode mode)
2576 unsigned char arr[16];
2577 int i, j, repeated, fsmbi, repeat;
2579 gcc_assert (CONSTANT_P (op));
2581 if (GET_MODE (op) != VOIDmode)
2582 mode = GET_MODE (op);
2584 /* A V4SI const_vector with all identical symbols is ok. */
2587 && GET_CODE (op) == CONST_VECTOR
2588 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2589 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2590 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2591 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2592 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2593 op = CONST_VECTOR_ELT (op, 0);
2595 switch (GET_CODE (op))
2599 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
2602 /* We can never know if the resulting address fits in 18 bits and can be
2603 loaded with ila. For now, assume the address will not overflow if
2604 the displacement is "small" (fits 'K' constraint). */
2605 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
2607 rtx sym = XEXP (XEXP (op, 0), 0);
2608 rtx cst = XEXP (XEXP (op, 0), 1);
2610 if (GET_CODE (sym) == SYMBOL_REF
2611 && GET_CODE (cst) == CONST_INT
2612 && satisfies_constraint_K (cst))
2621 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2622 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2623 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2629 constant_to_array (mode, op, arr);
2631 /* Check that each 4-byte slot is identical. */
2633 for (i = 4; i < 16; i += 4)
2634 for (j = 0; j < 4; j++)
2635 if (arr[j] != arr[i + j])
2640 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2641 val = trunc_int_for_mode (val, SImode);
2643 if (which_immediate_load (val) != SPU_NONE)
2647 /* Any mode of 2 bytes or smaller can be loaded with an il
2649 gcc_assert (GET_MODE_SIZE (mode) > 2);
2653 for (i = 0; i < 16 && fsmbi; i++)
2654 if (arr[i] != 0 && repeat == 0)
2656 else if (arr[i] != 0 && arr[i] != repeat)
2659 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
2661 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2674 static enum spu_immediate
2675 which_logical_immediate (HOST_WIDE_INT val)
2677 gcc_assert (val == trunc_int_for_mode (val, SImode));
2679 if (val >= -0x200 && val <= 0x1ff)
2681 if (val >= 0 && val <= 0xffff)
2683 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2685 val = trunc_int_for_mode (val, HImode);
2686 if (val >= -0x200 && val <= 0x1ff)
2688 if ((val & 0xff) == ((val >> 8) & 0xff))
2690 val = trunc_int_for_mode (val, QImode);
2691 if (val >= -0x200 && val <= 0x1ff)
2698 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2701 const_vector_immediate_p (rtx x)
2704 gcc_assert (GET_CODE (x) == CONST_VECTOR);
2705 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2706 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2707 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2713 logical_immediate_p (rtx op, enum machine_mode mode)
2716 unsigned char arr[16];
2719 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2720 || GET_CODE (op) == CONST_VECTOR);
2722 if (GET_CODE (op) == CONST_VECTOR
2723 && !const_vector_immediate_p (op))
2726 if (GET_MODE (op) != VOIDmode)
2727 mode = GET_MODE (op);
2729 constant_to_array (mode, op, arr);
2731 /* Check that bytes are repeated. */
2732 for (i = 4; i < 16; i += 4)
2733 for (j = 0; j < 4; j++)
2734 if (arr[j] != arr[i + j])
2737 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2738 val = trunc_int_for_mode (val, SImode);
2740 i = which_logical_immediate (val);
2741 return i != SPU_NONE && i != SPU_IOHL;
2745 iohl_immediate_p (rtx op, enum machine_mode mode)
2748 unsigned char arr[16];
2751 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2752 || GET_CODE (op) == CONST_VECTOR);
2754 if (GET_CODE (op) == CONST_VECTOR
2755 && !const_vector_immediate_p (op))
2758 if (GET_MODE (op) != VOIDmode)
2759 mode = GET_MODE (op);
2761 constant_to_array (mode, op, arr);
2763 /* Check that bytes are repeated. */
2764 for (i = 4; i < 16; i += 4)
2765 for (j = 0; j < 4; j++)
2766 if (arr[j] != arr[i + j])
2769 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2770 val = trunc_int_for_mode (val, SImode);
2772 return val >= 0 && val <= 0xffff;
2776 arith_immediate_p (rtx op, enum machine_mode mode,
2777 HOST_WIDE_INT low, HOST_WIDE_INT high)
2780 unsigned char arr[16];
2783 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2784 || GET_CODE (op) == CONST_VECTOR);
2786 if (GET_CODE (op) == CONST_VECTOR
2787 && !const_vector_immediate_p (op))
2790 if (GET_MODE (op) != VOIDmode)
2791 mode = GET_MODE (op);
2793 constant_to_array (mode, op, arr);
2795 if (VECTOR_MODE_P (mode))
2796 mode = GET_MODE_INNER (mode);
2798 bytes = GET_MODE_SIZE (mode);
2799 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2801 /* Check that bytes are repeated. */
2802 for (i = bytes; i < 16; i += bytes)
2803 for (j = 0; j < bytes; j++)
2804 if (arr[j] != arr[i + j])
2808 for (j = 1; j < bytes; j++)
2809 val = (val << 8) | arr[j];
2811 val = trunc_int_for_mode (val, mode);
2813 return val >= low && val <= high;
2817 - any 32-bit constant (SImode, SFmode)
2818 - any constant that can be generated with fsmbi (any mode)
2819 - a 64-bit constant where the high and low bits are identical
2821 - a 128-bit constant where the four 32-bit words match. */
2823 spu_legitimate_constant_p (rtx x)
2825 if (GET_CODE (x) == HIGH)
2827 /* V4SI with all identical symbols is valid. */
2829 && GET_MODE (x) == V4SImode
2830 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2831 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2832 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
2833 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2834 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2835 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2837 if (GET_CODE (x) == CONST_VECTOR
2838 && !const_vector_immediate_p (x))
2843 /* Valid address are:
2844 - symbol_ref, label_ref, const
2846 - reg + const, where either reg or const is 16 byte aligned
2847 - reg + reg, alignment doesn't matter
2848 The alignment matters in the reg+const case because lqd and stqd
2849 ignore the 4 least significant bits of the const. (TODO: It might be
2850 preferable to allow any alignment and fix it up when splitting.) */
2852 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2853 rtx x, int reg_ok_strict)
2855 if (mode == TImode && GET_CODE (x) == AND
2856 && GET_CODE (XEXP (x, 1)) == CONST_INT
2857 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2859 switch (GET_CODE (x))
2863 return !TARGET_LARGE_MEM;
2866 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
2868 rtx sym = XEXP (XEXP (x, 0), 0);
2869 rtx cst = XEXP (XEXP (x, 0), 1);
2871 /* Accept any symbol_ref + constant, assuming it does not
2872 wrap around the local store addressability limit. */
2873 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
2879 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2883 gcc_assert (GET_CODE (x) == REG);
2886 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2891 rtx op0 = XEXP (x, 0);
2892 rtx op1 = XEXP (x, 1);
2893 if (GET_CODE (op0) == SUBREG)
2894 op0 = XEXP (op0, 0);
2895 if (GET_CODE (op1) == SUBREG)
2896 op1 = XEXP (op1, 0);
2897 /* We can't just accept any aligned register because CSE can
2898 change it to a register that is not marked aligned and then
2899 recog will fail. So we only accept frame registers because
2900 they will only be changed to other frame registers. */
2901 if (GET_CODE (op0) == REG
2902 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2903 && GET_CODE (op1) == CONST_INT
2904 && INTVAL (op1) >= -0x2000
2905 && INTVAL (op1) <= 0x1fff
2906 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2908 if (GET_CODE (op0) == REG
2909 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2910 && GET_CODE (op1) == REG
2911 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2922 /* When the address is reg + const_int, force the const_int into a
2925 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2926 enum machine_mode mode)
2929 /* Make sure both operands are registers. */
2930 if (GET_CODE (x) == PLUS)
2934 if (ALIGNED_SYMBOL_REF_P (op0))
2936 op0 = force_reg (Pmode, op0);
2937 mark_reg_pointer (op0, 128);
2939 else if (GET_CODE (op0) != REG)
2940 op0 = force_reg (Pmode, op0);
2941 if (ALIGNED_SYMBOL_REF_P (op1))
2943 op1 = force_reg (Pmode, op1);
2944 mark_reg_pointer (op1, 128);
2946 else if (GET_CODE (op1) != REG)
2947 op1 = force_reg (Pmode, op1);
2948 x = gen_rtx_PLUS (Pmode, op0, op1);
2949 if (spu_legitimate_address (mode, x, 0))
2955 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2956 struct attribute_spec.handler. */
2958 spu_handle_fndecl_attribute (tree * node,
2960 tree args ATTRIBUTE_UNUSED,
2961 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2963 if (TREE_CODE (*node) != FUNCTION_DECL)
2965 warning (0, "`%s' attribute only applies to functions",
2966 IDENTIFIER_POINTER (name));
2967 *no_add_attrs = true;
2973 /* Handle the "vector" attribute. */
2975 spu_handle_vector_attribute (tree * node, tree name,
2976 tree args ATTRIBUTE_UNUSED,
2977 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2979 tree type = *node, result = NULL_TREE;
2980 enum machine_mode mode;
2983 while (POINTER_TYPE_P (type)
2984 || TREE_CODE (type) == FUNCTION_TYPE
2985 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2986 type = TREE_TYPE (type);
2988 mode = TYPE_MODE (type);
2990 unsigned_p = TYPE_UNSIGNED (type);
2994 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2997 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3000 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3003 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3006 result = V4SF_type_node;
3009 result = V2DF_type_node;
3015 /* Propagate qualifiers attached to the element type
3016 onto the vector type. */
3017 if (result && result != type && TYPE_QUALS (type))
3018 result = build_qualified_type (result, TYPE_QUALS (type));
3020 *no_add_attrs = true; /* No need to hang on to the attribute. */
3023 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3025 *node = reconstruct_complex_type (*node, result);
3030 /* Return nonzero if FUNC is a naked function. */
3032 spu_naked_function_p (tree func)
3036 if (TREE_CODE (func) != FUNCTION_DECL)
3039 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3040 return a != NULL_TREE;
3044 spu_initial_elimination_offset (int from, int to)
3046 int saved_regs_size = spu_saved_regs_size ();
3048 if (!current_function_is_leaf || current_function_outgoing_args_size
3049 || get_frame_size () || saved_regs_size)
3050 sp_offset = STACK_POINTER_OFFSET;
3051 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3052 return (sp_offset + current_function_outgoing_args_size);
3053 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3055 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3056 return sp_offset + current_function_outgoing_args_size
3057 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3058 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3059 return get_frame_size () + saved_regs_size + sp_offset;
3064 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3066 enum machine_mode mode = TYPE_MODE (type);
3067 int byte_size = ((mode == BLKmode)
3068 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3070 /* Make sure small structs are left justified in a register. */
3071 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3072 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3074 enum machine_mode smode;
3077 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3078 int n = byte_size / UNITS_PER_WORD;
3079 v = rtvec_alloc (nregs);
3080 for (i = 0; i < n; i++)
3082 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3083 gen_rtx_REG (TImode,
3086 GEN_INT (UNITS_PER_WORD * i));
3087 byte_size -= UNITS_PER_WORD;
3095 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3097 gen_rtx_EXPR_LIST (VOIDmode,
3098 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3099 GEN_INT (UNITS_PER_WORD * n));
3101 return gen_rtx_PARALLEL (mode, v);
3103 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3107 spu_function_arg (CUMULATIVE_ARGS cum,
3108 enum machine_mode mode,
3109 tree type, int named ATTRIBUTE_UNUSED)
3113 if (cum >= MAX_REGISTER_ARGS)
3116 byte_size = ((mode == BLKmode)
3117 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3119 /* The ABI does not allow parameters to be passed partially in
3120 reg and partially in stack. */
3121 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3124 /* Make sure small structs are left justified in a register. */
3125 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3126 && byte_size < UNITS_PER_WORD && byte_size > 0)
3128 enum machine_mode smode;
3132 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3133 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3134 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3136 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3139 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3142 /* Variable sized types are passed by reference. */
3144 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3145 enum machine_mode mode ATTRIBUTE_UNUSED,
3146 const_tree type, bool named ATTRIBUTE_UNUSED)
3148 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3154 /* Create and return the va_list datatype.
3156 On SPU, va_list is an array type equivalent to
3158 typedef struct __va_list_tag
3160 void *__args __attribute__((__aligned(16)));
3161 void *__skip __attribute__((__aligned(16)));
3165 where __args points to the arg that will be returned by the next
3166 va_arg(), and __skip points to the previous stack frame such that
3167 when __args == __skip we should advance __args by 32 bytes. */
3169 spu_build_builtin_va_list (void)
3171 tree f_args, f_skip, record, type_decl;
3174 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3177 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3179 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3180 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3182 DECL_FIELD_CONTEXT (f_args) = record;
3183 DECL_ALIGN (f_args) = 128;
3184 DECL_USER_ALIGN (f_args) = 1;
3186 DECL_FIELD_CONTEXT (f_skip) = record;
3187 DECL_ALIGN (f_skip) = 128;
3188 DECL_USER_ALIGN (f_skip) = 1;
3190 TREE_CHAIN (record) = type_decl;
3191 TYPE_NAME (record) = type_decl;
3192 TYPE_FIELDS (record) = f_args;
3193 TREE_CHAIN (f_args) = f_skip;
3195 /* We know this is being padded and we want it too. It is an internal
3196 type so hide the warnings from the user. */
3198 warn_padded = false;
3200 layout_type (record);
3204 /* The correct type is an array type of one element. */
3205 return build_array_type (record, build_index_type (size_zero_node));
3208 /* Implement va_start by filling the va_list structure VALIST.
3209 NEXTARG points to the first anonymous stack argument.
3211 The following global variables are used to initialize
3212 the va_list structure:
3214 current_function_args_info;
3215 the CUMULATIVE_ARGS for this function
3217 current_function_arg_offset_rtx:
3218 holds the offset of the first anonymous stack argument
3219 (relative to the virtual arg pointer). */
3222 spu_va_start (tree valist, rtx nextarg)
3224 tree f_args, f_skip;
3227 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3228 f_skip = TREE_CHAIN (f_args);
3230 valist = build_va_arg_indirect_ref (valist);
3232 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3234 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3236 /* Find the __args area. */
3237 t = make_tree (TREE_TYPE (args), nextarg);
3238 if (current_function_pretend_args_size > 0)
3239 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3240 size_int (-STACK_POINTER_OFFSET));
3241 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
3242 TREE_SIDE_EFFECTS (t) = 1;
3243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3245 /* Find the __skip area. */
3246 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3247 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3248 size_int (current_function_pretend_args_size
3249 - STACK_POINTER_OFFSET));
3250 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
3251 TREE_SIDE_EFFECTS (t) = 1;
3252 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3255 /* Gimplify va_arg by updating the va_list structure
3256 VALIST as required to retrieve an argument of type
3257 TYPE, and returning that argument.
3259 ret = va_arg(VALIST, TYPE);
3261 generates code equivalent to:
3263 paddedsize = (sizeof(TYPE) + 15) & -16;
3264 if (VALIST.__args + paddedsize > VALIST.__skip
3265 && VALIST.__args <= VALIST.__skip)
3266 addr = VALIST.__skip + 32;
3268 addr = VALIST.__args;
3269 VALIST.__args = addr + paddedsize;
3270 ret = *(TYPE *)addr;
3273 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
3274 tree * post_p ATTRIBUTE_UNUSED)
3276 tree f_args, f_skip;
3278 HOST_WIDE_INT size, rsize;
3279 tree paddedsize, addr, tmp;
3280 bool pass_by_reference_p;
3282 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3283 f_skip = TREE_CHAIN (f_args);
3285 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3287 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3289 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3291 addr = create_tmp_var (ptr_type_node, "va_arg");
3292 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3294 /* if an object is dynamically sized, a pointer to it is passed
3295 instead of the object itself. */
3296 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3298 if (pass_by_reference_p)
3299 type = build_pointer_type (type);
3300 size = int_size_in_bytes (type);
3301 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3303 /* build conditional expression to calculate addr. The expression
3304 will be gimplified later. */
3305 paddedsize = size_int (rsize);
3306 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, args, paddedsize);
3307 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3308 build2 (GT_EXPR, boolean_type_node, tmp, skip),
3309 build2 (LE_EXPR, boolean_type_node, args, skip));
3311 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3312 build2 (POINTER_PLUS_EXPR, ptr_type_node, skip,
3313 size_int (32)), args);
3315 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
3316 gimplify_and_add (tmp, pre_p);
3318 /* update VALIST.__args */
3319 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
3320 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
3321 gimplify_and_add (tmp, pre_p);
3323 addr = fold_convert (build_pointer_type (type), addr);
3325 if (pass_by_reference_p)
3326 addr = build_va_arg_indirect_ref (addr);
3328 return build_va_arg_indirect_ref (addr);
3331 /* Save parameter registers starting with the register that corresponds
3332 to the first unnamed parameters. If the first unnamed parameter is
3333 in the stack then save no registers. Set pretend_args_size to the
3334 amount of space needed to save the registers. */
3336 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3337 tree type, int *pretend_size, int no_rtl)
3346 /* cum currently points to the last named argument, we want to
3347 start at the next argument. */
3348 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3350 offset = -STACK_POINTER_OFFSET;
3351 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3353 tmp = gen_frame_mem (V4SImode,
3354 plus_constant (virtual_incoming_args_rtx,
3356 emit_move_insn (tmp,
3357 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3360 *pretend_size = offset + STACK_POINTER_OFFSET;
3365 spu_conditional_register_usage (void)
3369 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3370 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3374 /* This is called to decide when we can simplify a load instruction. We
3375 must only return true for registers which we know will always be
3376 aligned. Taking into account that CSE might replace this reg with
3377 another one that has not been marked aligned.
3378 So this is really only true for frame, stack and virtual registers,
3379 which we know are always aligned and should not be adversely effected
3382 regno_aligned_for_load (int regno)
3384 return regno == FRAME_POINTER_REGNUM
3385 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
3386 || regno == STACK_POINTER_REGNUM
3387 || (regno >= FIRST_VIRTUAL_REGISTER
3388 && regno <= LAST_VIRTUAL_REGISTER);
3391 /* Return TRUE when mem is known to be 16-byte aligned. */
3393 aligned_mem_p (rtx mem)
3395 if (MEM_ALIGN (mem) >= 128)
3397 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3399 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3401 rtx p0 = XEXP (XEXP (mem, 0), 0);
3402 rtx p1 = XEXP (XEXP (mem, 0), 1);
3403 if (regno_aligned_for_load (REGNO (p0)))
3405 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3407 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3411 else if (GET_CODE (XEXP (mem, 0)) == REG)
3413 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3416 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3418 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3420 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3421 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3422 if (GET_CODE (p0) == SYMBOL_REF
3423 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3429 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3430 into its SYMBOL_REF_FLAGS. */
3432 spu_encode_section_info (tree decl, rtx rtl, int first)
3434 default_encode_section_info (decl, rtl, first);
3436 /* If a variable has a forced alignment to < 16 bytes, mark it with
3437 SYMBOL_FLAG_ALIGN1. */
3438 if (TREE_CODE (decl) == VAR_DECL
3439 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3440 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3443 /* Return TRUE if we are certain the mem refers to a complete object
3444 which is both 16-byte aligned and padded to a 16-byte boundary. This
3445 would make it safe to store with a single instruction.
3446 We guarantee the alignment and padding for static objects by aligning
3447 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3448 FIXME: We currently cannot guarantee this for objects on the stack
3449 because assign_parm_setup_stack calls assign_stack_local with the
3450 alignment of the parameter mode and in that case the alignment never
3451 gets adjusted by LOCAL_ALIGNMENT. */
3453 store_with_one_insn_p (rtx mem)
3455 rtx addr = XEXP (mem, 0);
3456 if (GET_MODE (mem) == BLKmode)
3458 /* Only static objects. */
3459 if (GET_CODE (addr) == SYMBOL_REF)
3461 /* We use the associated declaration to make sure the access is
3462 referring to the whole object.
3463 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3464 if it is necessary. Will there be cases where one exists, and
3465 the other does not? Will there be cases where both exist, but
3466 have different types? */
3467 tree decl = MEM_EXPR (mem);
3469 && TREE_CODE (decl) == VAR_DECL
3470 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3472 decl = SYMBOL_REF_DECL (addr);
3474 && TREE_CODE (decl) == VAR_DECL
3475 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3482 spu_expand_mov (rtx * ops, enum machine_mode mode)
3484 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3487 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3489 rtx from = SUBREG_REG (ops[1]);
3490 enum machine_mode imode = GET_MODE (from);
3492 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3493 && GET_MODE_CLASS (imode) == MODE_INT
3494 && subreg_lowpart_p (ops[1]));
3496 if (GET_MODE_SIZE (imode) < 4)
3498 from = gen_rtx_SUBREG (SImode, from, 0);
3502 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3504 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
3505 emit_insn (GEN_FCN (icode) (ops[0], from));
3508 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3512 /* At least one of the operands needs to be a register. */
3513 if ((reload_in_progress | reload_completed) == 0
3514 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3516 rtx temp = force_reg (mode, ops[1]);
3517 emit_move_insn (ops[0], temp);
3520 if (reload_in_progress || reload_completed)
3522 if (CONSTANT_P (ops[1]))
3523 return spu_split_immediate (ops);
3528 if (GET_CODE (ops[0]) == MEM)
3530 if (!spu_valid_move (ops))
3532 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3533 gen_reg_rtx (TImode)));
3537 else if (GET_CODE (ops[1]) == MEM)
3539 if (!spu_valid_move (ops))
3542 (ops[0], ops[1], gen_reg_rtx (TImode),
3543 gen_reg_rtx (SImode)));
3547 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3549 if (GET_CODE (ops[1]) == CONST_INT)
3551 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3552 if (val != INTVAL (ops[1]))
3554 emit_move_insn (ops[0], GEN_INT (val));
3565 /* For now, only frame registers are known to be aligned at all times.
3566 We can't trust REGNO_POINTER_ALIGN because optimization will move
3567 registers around, potentially changing an "aligned" register in an
3568 address to an unaligned register, which would result in an invalid
3570 int regno = REGNO (reg);
3571 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3575 spu_split_load (rtx * ops)
3577 enum machine_mode mode = GET_MODE (ops[0]);
3578 rtx addr, load, rot, mem, p0, p1;
3581 addr = XEXP (ops[1], 0);
3585 if (GET_CODE (addr) == PLUS)
3588 aligned reg + aligned reg => lqx
3589 aligned reg + unaligned reg => lqx, rotqby
3590 aligned reg + aligned const => lqd
3591 aligned reg + unaligned const => lqd, rotqbyi
3592 unaligned reg + aligned reg => lqx, rotqby
3593 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3594 unaligned reg + aligned const => lqd, rotqby
3595 unaligned reg + unaligned const -> not allowed by legitimate address
3597 p0 = XEXP (addr, 0);
3598 p1 = XEXP (addr, 1);
3599 if (reg_align (p0) < 128)
3601 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3603 emit_insn (gen_addsi3 (ops[3], p0, p1));
3611 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3613 rot_amt = INTVAL (p1) & 15;
3614 p1 = GEN_INT (INTVAL (p1) & -16);
3615 addr = gen_rtx_PLUS (SImode, p0, p1);
3617 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3621 else if (GET_CODE (addr) == REG)
3623 if (reg_align (addr) < 128)
3626 else if (GET_CODE (addr) == CONST)
3628 if (GET_CODE (XEXP (addr, 0)) == PLUS
3629 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3630 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3632 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3634 addr = gen_rtx_CONST (Pmode,
3635 gen_rtx_PLUS (Pmode,
3636 XEXP (XEXP (addr, 0), 0),
3637 GEN_INT (rot_amt & -16)));
3639 addr = XEXP (XEXP (addr, 0), 0);
3644 else if (GET_CODE (addr) == CONST_INT)
3646 rot_amt = INTVAL (addr);
3647 addr = GEN_INT (rot_amt & -16);
3649 else if (!ALIGNED_SYMBOL_REF_P (addr))
3652 if (GET_MODE_SIZE (mode) < 4)
3653 rot_amt += GET_MODE_SIZE (mode) - 4;
3659 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3666 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3667 mem = change_address (ops[1], TImode, addr);
3669 emit_insn (gen_movti (load, mem));
3672 emit_insn (gen_rotqby_ti (load, load, rot));
3674 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3676 if (reload_completed)
3677 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3679 emit_insn (gen_spu_convert (ops[0], load));
3683 spu_split_store (rtx * ops)
3685 enum machine_mode mode = GET_MODE (ops[0]);
3688 rtx addr, p0, p1, p1_lo, smem;
3692 addr = XEXP (ops[0], 0);
3694 if (GET_CODE (addr) == PLUS)
3697 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3698 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3699 aligned reg + aligned const => lqd, c?d, shuf, stqx
3700 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3701 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3702 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3703 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3704 unaligned reg + unaligned const -> not allowed by legitimate address
3707 p0 = XEXP (addr, 0);
3708 p1 = p1_lo = XEXP (addr, 1);
3709 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3711 p1_lo = GEN_INT (INTVAL (p1) & 15);
3712 p1 = GEN_INT (INTVAL (p1) & -16);
3713 addr = gen_rtx_PLUS (SImode, p0, p1);
3716 else if (GET_CODE (addr) == REG)
3720 p1 = p1_lo = const0_rtx;
3725 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3726 p1 = 0; /* aform doesn't use p1 */
3728 if (ALIGNED_SYMBOL_REF_P (addr))
3730 else if (GET_CODE (addr) == CONST)
3732 if (GET_CODE (XEXP (addr, 0)) == PLUS
3733 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3734 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3736 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3738 addr = gen_rtx_CONST (Pmode,
3739 gen_rtx_PLUS (Pmode,
3740 XEXP (XEXP (addr, 0), 0),
3741 GEN_INT (v & -16)));
3743 addr = XEXP (XEXP (addr, 0), 0);
3744 p1_lo = GEN_INT (v & 15);
3747 else if (GET_CODE (addr) == CONST_INT)
3749 p1_lo = GEN_INT (INTVAL (addr) & 15);
3750 addr = GEN_INT (INTVAL (addr) & -16);
3754 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3756 scalar = store_with_one_insn_p (ops[0]);
3759 /* We could copy the flags from the ops[0] MEM to mem here,
3760 We don't because we want this load to be optimized away if
3761 possible, and copying the flags will prevent that in certain
3762 cases, e.g. consider the volatile flag. */
3764 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3765 set_mem_alias_set (lmem, 0);
3766 emit_insn (gen_movti (reg, lmem));
3768 if (!p0 || reg_align (p0) >= 128)
3769 p0 = stack_pointer_rtx;
3773 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3774 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3776 else if (reload_completed)
3778 if (GET_CODE (ops[1]) == REG)
3779 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3780 else if (GET_CODE (ops[1]) == SUBREG)
3781 emit_move_insn (reg,
3782 gen_rtx_REG (GET_MODE (reg),
3783 REGNO (SUBREG_REG (ops[1]))));
3789 if (GET_CODE (ops[1]) == REG)
3790 emit_insn (gen_spu_convert (reg, ops[1]));
3791 else if (GET_CODE (ops[1]) == SUBREG)
3792 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3797 if (GET_MODE_SIZE (mode) < 4 && scalar)
3798 emit_insn (gen_shlqby_ti
3799 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3801 smem = change_address (ops[0], TImode, addr);
3802 /* We can't use the previous alias set because the memory has changed
3803 size and can potentially overlap objects of other types. */
3804 set_mem_alias_set (smem, 0);
3806 emit_insn (gen_movti (smem, reg));
3809 /* Return TRUE if X is MEM which is a struct member reference
3810 and the member can safely be loaded and stored with a single
3811 instruction because it is padded. */
3813 mem_is_padded_component_ref (rtx x)
3815 tree t = MEM_EXPR (x);
3817 if (!t || TREE_CODE (t) != COMPONENT_REF)
3819 t = TREE_OPERAND (t, 1);
3820 if (!t || TREE_CODE (t) != FIELD_DECL
3821 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3823 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3824 r = DECL_FIELD_CONTEXT (t);
3825 if (!r || TREE_CODE (r) != RECORD_TYPE)
3827 /* Make sure they are the same mode */
3828 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3830 /* If there are no following fields then the field alignment assures
3831 the structure is padded to the alignment which means this field is
3833 if (TREE_CHAIN (t) == 0)
3835 /* If the following field is also aligned then this field will be
3838 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3843 /* Parse the -mfixed-range= option string. */
3845 fix_range (const char *const_str)
3848 char *str, *dash, *comma;
3850 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3851 REG2 are either register names or register numbers. The effect
3852 of this option is to mark the registers in the range from REG1 to
3853 REG2 as ``fixed'' so they won't be used by the compiler. */
3855 i = strlen (const_str);
3856 str = (char *) alloca (i + 1);
3857 memcpy (str, const_str, i + 1);
3861 dash = strchr (str, '-');
3864 warning (0, "value of -mfixed-range must have form REG1-REG2");
3868 comma = strchr (dash + 1, ',');
3872 first = decode_reg_name (str);
3875 warning (0, "unknown register name: %s", str);
3879 last = decode_reg_name (dash + 1);
3882 warning (0, "unknown register name: %s", dash + 1);
3890 warning (0, "%s-%s is an empty range", str, dash + 1);
3894 for (i = first; i <= last; ++i)
3895 fixed_regs[i] = call_used_regs[i] = 1;
3906 spu_valid_move (rtx * ops)
3908 enum machine_mode mode = GET_MODE (ops[0]);
3909 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3912 /* init_expr_once tries to recog against load and store insns to set
3913 the direct_load[] and direct_store[] arrays. We always want to
3914 consider those loads and stores valid. init_expr_once is called in
3915 the context of a dummy function which does not have a decl. */
3916 if (cfun->decl == 0)
3919 /* Don't allows loads/stores which would require more than 1 insn.
3920 During and after reload we assume loads and stores only take 1
3922 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3924 if (GET_CODE (ops[0]) == MEM
3925 && (GET_MODE_SIZE (mode) < 4
3926 || !(store_with_one_insn_p (ops[0])
3927 || mem_is_padded_component_ref (ops[0]))))
3929 if (GET_CODE (ops[1]) == MEM
3930 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3936 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3937 can be generated using the fsmbi instruction. */
3939 fsmbi_const_p (rtx x)
3943 /* We can always choose TImode for CONST_INT because the high bits
3944 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3945 enum immediate_class c = classify_immediate (x, TImode);
3946 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
3951 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3952 can be generated using the cbd, chd, cwd or cdd instruction. */
3954 cpat_const_p (rtx x, enum machine_mode mode)
3958 enum immediate_class c = classify_immediate (x, mode);
3959 return c == IC_CPAT;
3965 gen_cpat_const (rtx * ops)
3967 unsigned char dst[16];
3968 int i, offset, shift, isize;
3969 if (GET_CODE (ops[3]) != CONST_INT
3970 || GET_CODE (ops[2]) != CONST_INT
3971 || (GET_CODE (ops[1]) != CONST_INT
3972 && GET_CODE (ops[1]) != REG))
3974 if (GET_CODE (ops[1]) == REG
3975 && (!REG_POINTER (ops[1])
3976 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3979 for (i = 0; i < 16; i++)
3981 isize = INTVAL (ops[3]);
3984 else if (isize == 2)
3988 offset = (INTVAL (ops[2]) +
3989 (GET_CODE (ops[1]) ==
3990 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3991 for (i = 0; i < isize; i++)
3992 dst[offset + i] = i + shift;
3993 return array_to_constant (TImode, dst);
3996 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3997 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3998 than 16 bytes, the value is repeated across the rest of the array. */
4000 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4005 memset (arr, 0, 16);
4006 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4007 if (GET_CODE (x) == CONST_INT
4008 || (GET_CODE (x) == CONST_DOUBLE
4009 && (mode == SFmode || mode == DFmode)))
4011 gcc_assert (mode != VOIDmode && mode != BLKmode);
4013 if (GET_CODE (x) == CONST_DOUBLE)
4014 val = const_double_to_hwint (x);
4017 first = GET_MODE_SIZE (mode) - 1;
4018 for (i = first; i >= 0; i--)
4020 arr[i] = val & 0xff;
4023 /* Splat the constant across the whole array. */
4024 for (j = 0, i = first + 1; i < 16; i++)
4027 j = (j == first) ? 0 : j + 1;
4030 else if (GET_CODE (x) == CONST_DOUBLE)
4032 val = CONST_DOUBLE_LOW (x);
4033 for (i = 15; i >= 8; i--)
4035 arr[i] = val & 0xff;
4038 val = CONST_DOUBLE_HIGH (x);
4039 for (i = 7; i >= 0; i--)
4041 arr[i] = val & 0xff;
4045 else if (GET_CODE (x) == CONST_VECTOR)
4049 mode = GET_MODE_INNER (mode);
4050 units = CONST_VECTOR_NUNITS (x);
4051 for (i = 0; i < units; i++)
4053 elt = CONST_VECTOR_ELT (x, i);
4054 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4056 if (GET_CODE (elt) == CONST_DOUBLE)
4057 val = const_double_to_hwint (elt);
4060 first = GET_MODE_SIZE (mode) - 1;
4061 if (first + i * GET_MODE_SIZE (mode) > 16)
4063 for (j = first; j >= 0; j--)
4065 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4075 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4076 smaller than 16 bytes, use the bytes that would represent that value
4077 in a register, e.g., for QImode return the value of arr[3]. */
4079 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4081 enum machine_mode inner_mode;
4083 int units, size, i, j, k;
4086 if (GET_MODE_CLASS (mode) == MODE_INT
4087 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4089 j = GET_MODE_SIZE (mode);
4090 i = j < 4 ? 4 - j : 0;
4091 for (val = 0; i < j; i++)
4092 val = (val << 8) | arr[i];
4093 val = trunc_int_for_mode (val, mode);
4094 return GEN_INT (val);
4100 for (i = high = 0; i < 8; i++)
4101 high = (high << 8) | arr[i];
4102 for (i = 8, val = 0; i < 16; i++)
4103 val = (val << 8) | arr[i];
4104 return immed_double_const (val, high, TImode);
4108 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4109 val = trunc_int_for_mode (val, SImode);
4110 return hwint_to_const_double (SFmode, val);
4114 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4116 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
4117 return hwint_to_const_double (DFmode, val);
4120 if (!VECTOR_MODE_P (mode))
4123 units = GET_MODE_NUNITS (mode);
4124 size = GET_MODE_UNIT_SIZE (mode);
4125 inner_mode = GET_MODE_INNER (mode);
4126 v = rtvec_alloc (units);
4128 for (k = i = 0; i < units; ++i)
4131 for (j = 0; j < size; j++, k++)
4132 val = (val << 8) | arr[k];
4134 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4135 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4137 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4142 return gen_rtx_CONST_VECTOR (mode, v);
4146 reloc_diagnostic (rtx x)
4148 tree loc_decl, decl = 0;
4150 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4153 if (GET_CODE (x) == SYMBOL_REF)
4154 decl = SYMBOL_REF_DECL (x);
4155 else if (GET_CODE (x) == CONST
4156 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4157 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4159 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4160 if (decl && !DECL_P (decl))
4163 /* We use last_assemble_variable_decl to get line information. It's
4164 not always going to be right and might not even be close, but will
4165 be right for the more common cases. */
4166 if (!last_assemble_variable_decl || in_section == ctors_section)
4169 loc_decl = last_assemble_variable_decl;
4171 /* The decl could be a string constant. */
4172 if (decl && DECL_P (decl))
4173 msg = "%Jcreating run-time relocation for %qD";
4175 msg = "creating run-time relocation";
4177 if (TARGET_WARN_RELOC)
4178 warning (0, msg, loc_decl, decl);
4180 error (msg, loc_decl, decl);
4183 /* Hook into assemble_integer so we can generate an error for run-time
4184 relocations. The SPU ABI disallows them. */
4186 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4188 /* By default run-time relocations aren't supported, but we allow them
4189 in case users support it in their own run-time loader. And we provide
4190 a warning for those users that don't. */
4191 if ((GET_CODE (x) == SYMBOL_REF)
4192 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4193 reloc_diagnostic (x);
4195 return default_assemble_integer (x, size, aligned_p);
4199 spu_asm_globalize_label (FILE * file, const char *name)
4201 fputs ("\t.global\t", file);
4202 assemble_name (file, name);
4207 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
4209 enum machine_mode mode = GET_MODE (x);
4210 int cost = COSTS_N_INSNS (2);
4212 /* Folding to a CONST_VECTOR will use extra space but there might
4213 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4214 only if it allows us to fold away multiple insns. Changing the cost
4215 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4216 because this cost will only be compared against a single insn.
4217 if (code == CONST_VECTOR)
4218 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4221 /* Use defaults for float operations. Not accurate but good enough. */
4224 *total = COSTS_N_INSNS (13);
4229 *total = COSTS_N_INSNS (6);
4235 if (satisfies_constraint_K (x))
4237 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4238 *total = COSTS_N_INSNS (1);
4240 *total = COSTS_N_INSNS (3);
4244 *total = COSTS_N_INSNS (3);
4249 *total = COSTS_N_INSNS (0);
4253 *total = COSTS_N_INSNS (5);
4257 case FLOAT_TRUNCATE:
4259 case UNSIGNED_FLOAT:
4262 *total = COSTS_N_INSNS (7);
4268 *total = COSTS_N_INSNS (9);
4275 GET_CODE (XEXP (x, 0)) ==
4276 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4277 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4279 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4281 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4282 cost = COSTS_N_INSNS (14);
4283 if ((val & 0xffff) == 0)
4284 cost = COSTS_N_INSNS (9);
4285 else if (val > 0 && val < 0x10000)
4286 cost = COSTS_N_INSNS (11);
4295 *total = COSTS_N_INSNS (20);
4302 *total = COSTS_N_INSNS (4);
4305 if (XINT (x, 1) == UNSPEC_CONVERT)
4306 *total = COSTS_N_INSNS (0);
4308 *total = COSTS_N_INSNS (4);
4311 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4312 if (GET_MODE_CLASS (mode) == MODE_INT
4313 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4314 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4315 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4321 spu_eh_return_filter_mode (void)
4323 /* We would like this to be SImode, but sjlj exceptions seems to work
4324 only with word_mode. */
4328 /* Decide whether we can make a sibling call to a function. DECL is the
4329 declaration of the function being targeted by the call and EXP is the
4330 CALL_EXPR representing the call. */
4332 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4334 return decl && !TARGET_LARGE_MEM;
4337 /* We need to correctly update the back chain pointer and the Available
4338 Stack Size (which is in the second slot of the sp register.) */
4340 spu_allocate_stack (rtx op0, rtx op1)
4343 rtx chain = gen_reg_rtx (V4SImode);
4344 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4345 rtx sp = gen_reg_rtx (V4SImode);
4346 rtx splatted = gen_reg_rtx (V4SImode);
4347 rtx pat = gen_reg_rtx (TImode);
4349 /* copy the back chain so we can save it back again. */
4350 emit_move_insn (chain, stack_bot);
4352 op1 = force_reg (SImode, op1);
4354 v = 0x1020300010203ll;
4355 emit_move_insn (pat, immed_double_const (v, v, TImode));
4356 emit_insn (gen_shufb (splatted, op1, op1, pat));
4358 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4359 emit_insn (gen_subv4si3 (sp, sp, splatted));
4361 if (flag_stack_check)
4363 rtx avail = gen_reg_rtx(SImode);
4364 rtx result = gen_reg_rtx(SImode);
4365 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4366 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4367 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4370 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4372 emit_move_insn (stack_bot, chain);
4374 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4378 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4380 static unsigned char arr[16] =
4381 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4382 rtx temp = gen_reg_rtx (SImode);
4383 rtx temp2 = gen_reg_rtx (SImode);
4384 rtx temp3 = gen_reg_rtx (V4SImode);
4385 rtx temp4 = gen_reg_rtx (V4SImode);
4386 rtx pat = gen_reg_rtx (TImode);
4387 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4389 /* Restore the backchain from the first word, sp from the second. */
4390 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4391 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4393 emit_move_insn (pat, array_to_constant (TImode, arr));
4395 /* Compute Available Stack Size for sp */
4396 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4397 emit_insn (gen_shufb (temp3, temp, temp, pat));
4399 /* Compute Available Stack Size for back chain */
4400 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4401 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4402 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4404 emit_insn (gen_addv4si3 (sp, sp, temp3));
4405 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4409 spu_init_libfuncs (void)
4411 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4412 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4413 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4414 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4415 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4416 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4417 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4418 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4419 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4420 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4421 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4423 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4424 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
4427 /* Make a subreg, stripping any existing subreg. We could possibly just
4428 call simplify_subreg, but in this case we know what we want. */
4430 spu_gen_subreg (enum machine_mode mode, rtx x)
4432 if (GET_CODE (x) == SUBREG)
4434 if (GET_MODE (x) == mode)
4436 return gen_rtx_SUBREG (mode, x, 0);
4440 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4442 return (TYPE_MODE (type) == BLKmode
4444 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4445 || int_size_in_bytes (type) >
4446 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4449 /* Create the built-in types and functions */
4451 struct spu_builtin_description spu_builtins[] = {
4452 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4453 {fcode, icode, name, type, params, NULL_TREE},
4454 #include "spu-builtins.def"
4459 spu_init_builtins (void)
4461 struct spu_builtin_description *d;
4464 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4465 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4466 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4467 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4468 V4SF_type_node = build_vector_type (float_type_node, 4);
4469 V2DF_type_node = build_vector_type (double_type_node, 2);
4471 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4472 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4473 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4474 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4476 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
4478 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4479 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4480 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4481 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4482 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4483 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4484 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4485 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4486 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4487 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4488 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4489 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4491 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4492 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4493 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4494 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4495 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4496 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4497 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4498 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4500 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4501 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4503 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4505 spu_builtin_types[SPU_BTI_PTR] =
4506 build_pointer_type (build_qualified_type
4508 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4510 /* For each builtin we build a new prototype. The tree code will make
4511 sure nodes are shared. */
4512 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4515 char name[64]; /* build_function will make a copy. */
4521 /* find last parm */
4522 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4528 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4530 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4532 sprintf (name, "__builtin_%s", d->name);
4534 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4536 if (d->fcode == SPU_MASK_FOR_LOAD)
4537 TREE_READONLY (d->fndecl) = 1;
4542 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4544 static unsigned char arr[16] =
4545 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4547 rtx temp = gen_reg_rtx (Pmode);
4548 rtx temp2 = gen_reg_rtx (V4SImode);
4549 rtx temp3 = gen_reg_rtx (V4SImode);
4550 rtx pat = gen_reg_rtx (TImode);
4551 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4553 emit_move_insn (pat, array_to_constant (TImode, arr));
4555 /* Restore the sp. */
4556 emit_move_insn (temp, op1);
4557 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4559 /* Compute available stack size for sp. */
4560 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4561 emit_insn (gen_shufb (temp3, temp, temp, pat));
4563 emit_insn (gen_addv4si3 (sp, sp, temp3));
4564 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4568 spu_safe_dma (HOST_WIDE_INT channel)
4570 return (channel >= 21 && channel <= 27);
4574 spu_builtin_splats (rtx ops[])
4576 enum machine_mode mode = GET_MODE (ops[0]);
4577 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4579 unsigned char arr[16];
4580 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4581 emit_move_insn (ops[0], array_to_constant (mode, arr));
4583 else if (!flag_pic && GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4585 rtvec v = rtvec_alloc (4);
4586 RTVEC_ELT (v, 0) = ops[1];
4587 RTVEC_ELT (v, 1) = ops[1];
4588 RTVEC_ELT (v, 2) = ops[1];
4589 RTVEC_ELT (v, 3) = ops[1];
4590 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4594 rtx reg = gen_reg_rtx (TImode);
4596 if (GET_CODE (ops[1]) != REG
4597 && GET_CODE (ops[1]) != SUBREG)
4598 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4604 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4610 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4615 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4620 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4626 emit_move_insn (reg, shuf);
4627 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4632 spu_builtin_extract (rtx ops[])
4634 enum machine_mode mode;
4637 mode = GET_MODE (ops[1]);
4639 if (GET_CODE (ops[2]) == CONST_INT)
4644 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4647 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4650 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4653 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4656 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4659 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4667 from = spu_gen_subreg (TImode, ops[1]);
4668 rot = gen_reg_rtx (TImode);
4669 tmp = gen_reg_rtx (SImode);
4674 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4677 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4678 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4682 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4686 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4691 emit_insn (gen_rotqby_ti (rot, from, tmp));
4693 emit_insn (gen_spu_convert (ops[0], rot));
4697 spu_builtin_insert (rtx ops[])
4699 enum machine_mode mode = GET_MODE (ops[0]);
4700 enum machine_mode imode = GET_MODE_INNER (mode);
4701 rtx mask = gen_reg_rtx (TImode);
4704 if (GET_CODE (ops[3]) == CONST_INT)
4705 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4708 offset = gen_reg_rtx (SImode);
4709 emit_insn (gen_mulsi3
4710 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4713 (mask, stack_pointer_rtx, offset,
4714 GEN_INT (GET_MODE_SIZE (imode))));
4715 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4719 spu_builtin_promote (rtx ops[])
4721 enum machine_mode mode, imode;
4722 rtx rot, from, offset;
4725 mode = GET_MODE (ops[0]);
4726 imode = GET_MODE_INNER (mode);
4728 from = gen_reg_rtx (TImode);
4729 rot = spu_gen_subreg (TImode, ops[0]);
4731 emit_insn (gen_spu_convert (from, ops[1]));
4733 if (GET_CODE (ops[2]) == CONST_INT)
4735 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4736 if (GET_MODE_SIZE (imode) < 4)
4737 pos += 4 - GET_MODE_SIZE (imode);
4738 offset = GEN_INT (pos & 15);
4742 offset = gen_reg_rtx (SImode);
4746 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4749 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4750 emit_insn (gen_addsi3 (offset, offset, offset));
4754 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4755 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4759 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4765 emit_insn (gen_rotqby_ti (rot, from, offset));
4769 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4771 rtx shuf = gen_reg_rtx (V4SImode);
4772 rtx insn = gen_reg_rtx (V4SImode);
4777 fnaddr = force_reg (SImode, fnaddr);
4778 cxt = force_reg (SImode, cxt);
4780 if (TARGET_LARGE_MEM)
4782 rtx rotl = gen_reg_rtx (V4SImode);
4783 rtx mask = gen_reg_rtx (V4SImode);
4784 rtx bi = gen_reg_rtx (SImode);
4785 unsigned char shufa[16] = {
4786 2, 3, 0, 1, 18, 19, 16, 17,
4787 0, 1, 2, 3, 16, 17, 18, 19
4789 unsigned char insna[16] = {
4791 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4793 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4796 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4797 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4799 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4800 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4801 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4802 emit_insn (gen_selb (insn, insnc, rotl, mask));
4804 mem = memory_address (Pmode, tramp);
4805 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4807 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4808 mem = memory_address (Pmode, plus_constant (tramp, 16));
4809 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4813 rtx scxt = gen_reg_rtx (SImode);
4814 rtx sfnaddr = gen_reg_rtx (SImode);
4815 unsigned char insna[16] = {
4816 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4822 shufc = gen_reg_rtx (TImode);
4823 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4825 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4826 fits 18 bits and the last 4 are zeros. This will be true if
4827 the stack pointer is initialized to 0x3fff0 at program start,
4828 otherwise the ila instruction will be garbage. */
4830 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4831 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4833 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4834 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4835 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4837 mem = memory_address (Pmode, tramp);
4838 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4841 emit_insn (gen_sync ());
4845 spu_expand_sign_extend (rtx ops[])
4847 unsigned char arr[16];
4848 rtx pat = gen_reg_rtx (TImode);
4851 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4852 if (GET_MODE (ops[1]) == QImode)
4854 sign = gen_reg_rtx (HImode);
4855 emit_insn (gen_extendqihi2 (sign, ops[1]));
4856 for (i = 0; i < 16; i++)
4862 for (i = 0; i < 16; i++)
4864 switch (GET_MODE (ops[1]))
4867 sign = gen_reg_rtx (SImode);
4868 emit_insn (gen_extendhisi2 (sign, ops[1]));
4870 arr[last - 1] = 0x02;
4873 sign = gen_reg_rtx (SImode);
4874 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4875 for (i = 0; i < 4; i++)
4876 arr[last - i] = 3 - i;
4879 sign = gen_reg_rtx (SImode);
4880 c = gen_reg_rtx (SImode);
4881 emit_insn (gen_spu_convert (c, ops[1]));
4882 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4883 for (i = 0; i < 8; i++)
4884 arr[last - i] = 7 - i;
4890 emit_move_insn (pat, array_to_constant (TImode, arr));
4891 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4894 /* expand vector initialization. If there are any constant parts,
4895 load constant parts first. Then load any non-constant parts. */
4897 spu_expand_vector_init (rtx target, rtx vals)
4899 enum machine_mode mode = GET_MODE (target);
4900 int n_elts = GET_MODE_NUNITS (mode);
4902 bool all_same = true;
4903 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
4906 first = XVECEXP (vals, 0, 0);
4907 for (i = 0; i < n_elts; ++i)
4909 x = XVECEXP (vals, 0, i);
4910 if (!CONSTANT_P (x))
4914 if (first_constant == NULL_RTX)
4917 if (i > 0 && !rtx_equal_p (x, first))
4921 /* if all elements are the same, use splats to repeat elements */
4924 if (!CONSTANT_P (first)
4925 && !register_operand (first, GET_MODE (x)))
4926 first = force_reg (GET_MODE (first), first);
4927 emit_insn (gen_spu_splats (target, first));
4931 /* load constant parts */
4932 if (n_var != n_elts)
4936 emit_move_insn (target,
4937 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4941 rtx constant_parts_rtx = copy_rtx (vals);
4943 gcc_assert (first_constant != NULL_RTX);
4944 /* fill empty slots with the first constant, this increases
4945 our chance of using splats in the recursive call below. */
4946 for (i = 0; i < n_elts; ++i)
4947 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4948 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4950 spu_expand_vector_init (target, constant_parts_rtx);
4954 /* load variable parts */
4957 rtx insert_operands[4];
4959 insert_operands[0] = target;
4960 insert_operands[2] = target;
4961 for (i = 0; i < n_elts; ++i)
4963 x = XVECEXP (vals, 0, i);
4964 if (!CONSTANT_P (x))
4966 if (!register_operand (x, GET_MODE (x)))
4967 x = force_reg (GET_MODE (x), x);
4968 insert_operands[1] = x;
4969 insert_operands[3] = GEN_INT (i);
4970 spu_builtin_insert (insert_operands);
4976 /* Return insn index for the vector compare instruction for given CODE,
4977 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4980 get_vec_cmp_insn (enum rtx_code code,
4981 enum machine_mode dest_mode,
4982 enum machine_mode op_mode)
4988 if (dest_mode == V16QImode && op_mode == V16QImode)
4989 return CODE_FOR_ceq_v16qi;
4990 if (dest_mode == V8HImode && op_mode == V8HImode)
4991 return CODE_FOR_ceq_v8hi;
4992 if (dest_mode == V4SImode && op_mode == V4SImode)
4993 return CODE_FOR_ceq_v4si;
4994 if (dest_mode == V4SImode && op_mode == V4SFmode)
4995 return CODE_FOR_ceq_v4sf;
4996 if (dest_mode == V2DImode && op_mode == V2DFmode)
4997 return CODE_FOR_ceq_v2df;
5000 if (dest_mode == V16QImode && op_mode == V16QImode)
5001 return CODE_FOR_cgt_v16qi;
5002 if (dest_mode == V8HImode && op_mode == V8HImode)
5003 return CODE_FOR_cgt_v8hi;
5004 if (dest_mode == V4SImode && op_mode == V4SImode)
5005 return CODE_FOR_cgt_v4si;
5006 if (dest_mode == V4SImode && op_mode == V4SFmode)
5007 return CODE_FOR_cgt_v4sf;
5008 if (dest_mode == V2DImode && op_mode == V2DFmode)
5009 return CODE_FOR_cgt_v2df;
5012 if (dest_mode == V16QImode && op_mode == V16QImode)
5013 return CODE_FOR_clgt_v16qi;
5014 if (dest_mode == V8HImode && op_mode == V8HImode)
5015 return CODE_FOR_clgt_v8hi;
5016 if (dest_mode == V4SImode && op_mode == V4SImode)
5017 return CODE_FOR_clgt_v4si;
5025 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5026 DMODE is expected destination mode. This is a recursive function. */
5029 spu_emit_vector_compare (enum rtx_code rcode,
5031 enum machine_mode dmode)
5035 enum machine_mode dest_mode;
5036 enum machine_mode op_mode = GET_MODE (op1);
5038 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5040 /* Floating point vector compare instructions uses destination V4SImode.
5041 Double floating point vector compare instructions uses destination V2DImode.
5042 Move destination to appropriate mode later. */
5043 if (dmode == V4SFmode)
5044 dest_mode = V4SImode;
5045 else if (dmode == V2DFmode)
5046 dest_mode = V2DImode;
5050 mask = gen_reg_rtx (dest_mode);
5051 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5053 if (vec_cmp_insn == -1)
5055 bool swap_operands = false;
5056 bool try_again = false;
5061 swap_operands = true;
5066 swap_operands = true;
5070 /* Treat A != B as ~(A==B). */
5072 enum insn_code nor_code;
5073 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5074 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5075 gcc_assert (nor_code != CODE_FOR_nothing);
5076 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5077 if (dmode != dest_mode)
5079 rtx temp = gen_reg_rtx (dest_mode);
5080 convert_move (temp, mask, 0);
5090 /* Try GT/GTU/LT/LTU OR EQ */
5093 enum insn_code ior_code;
5094 enum rtx_code new_code;
5098 case GE: new_code = GT; break;
5099 case GEU: new_code = GTU; break;
5100 case LE: new_code = LT; break;
5101 case LEU: new_code = LTU; break;
5106 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5107 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5109 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5110 gcc_assert (ior_code != CODE_FOR_nothing);
5111 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5112 if (dmode != dest_mode)
5114 rtx temp = gen_reg_rtx (dest_mode);
5115 convert_move (temp, mask, 0);
5125 /* You only get two chances. */
5127 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5129 gcc_assert (vec_cmp_insn != -1);
5140 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5141 if (dmode != dest_mode)
5143 rtx temp = gen_reg_rtx (dest_mode);
5144 convert_move (temp, mask, 0);
5151 /* Emit vector conditional expression.
5152 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5153 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5156 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5157 rtx cond, rtx cc_op0, rtx cc_op1)
5159 enum machine_mode dest_mode = GET_MODE (dest);
5160 enum rtx_code rcode = GET_CODE (cond);
5163 /* Get the vector mask for the given relational operations. */
5164 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5166 emit_insn(gen_selb (dest, op2, op1, mask));
5172 spu_force_reg (enum machine_mode mode, rtx op)
5175 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5177 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5178 || GET_MODE (op) == BLKmode)
5179 return force_reg (mode, convert_to_mode (mode, op, 0));
5183 r = force_reg (GET_MODE (op), op);
5184 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5186 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5191 x = gen_reg_rtx (mode);
5192 emit_insn (gen_spu_convert (x, r));
5197 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5199 HOST_WIDE_INT v = 0;
5201 /* Check the range of immediate operands. */
5202 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5204 int range = p - SPU_BTI_7;
5206 if (!CONSTANT_P (op))
5207 error ("%s expects an integer literal in the range [%d, %d].",
5209 spu_builtin_range[range].low, spu_builtin_range[range].high);
5211 if (GET_CODE (op) == CONST
5212 && (GET_CODE (XEXP (op, 0)) == PLUS
5213 || GET_CODE (XEXP (op, 0)) == MINUS))
5215 v = INTVAL (XEXP (XEXP (op, 0), 1));
5216 op = XEXP (XEXP (op, 0), 0);
5218 else if (GET_CODE (op) == CONST_INT)
5220 else if (GET_CODE (op) == CONST_VECTOR
5221 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5222 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5224 /* The default for v is 0 which is valid in every range. */
5225 if (v < spu_builtin_range[range].low
5226 || v > spu_builtin_range[range].high)
5227 error ("%s expects an integer literal in the range [%d, %d]. ("
5228 HOST_WIDE_INT_PRINT_DEC ")",
5230 spu_builtin_range[range].low, spu_builtin_range[range].high,
5239 /* This is only used in lqa, and stqa. Even though the insns
5240 encode 16 bits of the address (all but the 2 least
5241 significant), only 14 bits are used because it is masked to
5242 be 16 byte aligned. */
5246 /* This is used for lqr and stqr. */
5253 if (GET_CODE (op) == LABEL_REF
5254 || (GET_CODE (op) == SYMBOL_REF
5255 && SYMBOL_REF_FUNCTION_P (op))
5256 || (v & ((1 << lsbits) - 1)) != 0)
5257 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5264 expand_builtin_args (struct spu_builtin_description *d, tree exp,
5265 rtx target, rtx ops[])
5267 enum insn_code icode = d->icode;
5270 /* Expand the arguments into rtl. */
5272 if (d->parm[0] != SPU_BTI_VOID)
5275 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
5277 tree arg = CALL_EXPR_ARG (exp, a);
5280 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
5285 spu_expand_builtin_1 (struct spu_builtin_description *d,
5286 tree exp, rtx target)
5290 enum insn_code icode = d->icode;
5291 enum machine_mode mode, tmode;
5295 /* Set up ops[] with values from arglist. */
5296 expand_builtin_args (d, exp, target, ops);
5298 /* Handle the target operand which must be operand 0. */
5300 if (d->parm[0] != SPU_BTI_VOID)
5303 /* We prefer the mode specified for the match_operand otherwise
5304 use the mode from the builtin function prototype. */
5305 tmode = insn_data[d->icode].operand[0].mode;
5306 if (tmode == VOIDmode)
5307 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5309 /* Try to use target because not using it can lead to extra copies
5310 and when we are using all of the registers extra copies leads
5312 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5315 target = ops[0] = gen_reg_rtx (tmode);
5317 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5323 if (d->fcode == SPU_MASK_FOR_LOAD)
5325 enum machine_mode mode = insn_data[icode].operand[1].mode;
5330 arg = CALL_EXPR_ARG (exp, 0);
5331 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5332 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5333 addr = memory_address (mode, op);
5336 op = gen_reg_rtx (GET_MODE (addr));
5337 emit_insn (gen_rtx_SET (VOIDmode, op,
5338 gen_rtx_NEG (GET_MODE (addr), addr)));
5339 op = gen_rtx_MEM (mode, op);
5341 pat = GEN_FCN (icode) (target, op);
5348 /* Ignore align_hint, but still expand it's args in case they have
5350 if (icode == CODE_FOR_spu_align_hint)
5353 /* Handle the rest of the operands. */
5354 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5356 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5357 mode = insn_data[d->icode].operand[i].mode;
5359 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5361 /* mode can be VOIDmode here for labels */
5363 /* For specific intrinsics with an immediate operand, e.g.,
5364 si_ai(), we sometimes need to convert the scalar argument to a
5365 vector argument by splatting the scalar. */
5366 if (VECTOR_MODE_P (mode)
5367 && (GET_CODE (ops[i]) == CONST_INT
5368 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
5369 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
5371 if (GET_CODE (ops[i]) == CONST_INT)
5372 ops[i] = spu_const (mode, INTVAL (ops[i]));
5375 rtx reg = gen_reg_rtx (mode);
5376 enum machine_mode imode = GET_MODE_INNER (mode);
5377 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5378 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5379 if (imode != GET_MODE (ops[i]))
5380 ops[i] = convert_to_mode (imode, ops[i],
5381 TYPE_UNSIGNED (spu_builtin_types
5383 emit_insn (gen_spu_splats (reg, ops[i]));
5388 spu_check_builtin_parm (d, ops[i], d->parm[p]);
5390 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
5391 ops[i] = spu_force_reg (mode, ops[i]);
5394 switch (insn_data[icode].n_operands)
5397 pat = GEN_FCN (icode) (0);
5400 pat = GEN_FCN (icode) (ops[0]);
5403 pat = GEN_FCN (icode) (ops[0], ops[1]);
5406 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
5409 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
5412 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
5415 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
5424 if (d->type == B_CALL || d->type == B_BISLED)
5425 emit_call_insn (pat);
5426 else if (d->type == B_JUMP)
5428 emit_jump_insn (pat);
5434 return_type = spu_builtin_types[d->parm[0]];
5435 if (d->parm[0] != SPU_BTI_VOID
5436 && GET_MODE (target) != TYPE_MODE (return_type))
5438 /* target is the return value. It should always be the mode of
5439 the builtin function prototype. */
5440 target = spu_force_reg (TYPE_MODE (return_type), target);
5447 spu_expand_builtin (tree exp,
5449 rtx subtarget ATTRIBUTE_UNUSED,
5450 enum machine_mode mode ATTRIBUTE_UNUSED,
5451 int ignore ATTRIBUTE_UNUSED)
5453 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5454 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
5455 struct spu_builtin_description *d;
5457 if (fcode < NUM_SPU_BUILTINS)
5459 d = &spu_builtins[fcode];
5461 return spu_expand_builtin_1 (d, exp, target);
5466 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5468 spu_builtin_mul_widen_even (tree type)
5470 switch (TYPE_MODE (type))
5473 if (TYPE_UNSIGNED (type))
5474 return spu_builtins[SPU_MULE_0].fndecl;
5476 return spu_builtins[SPU_MULE_1].fndecl;
5483 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5485 spu_builtin_mul_widen_odd (tree type)
5487 switch (TYPE_MODE (type))
5490 if (TYPE_UNSIGNED (type))
5491 return spu_builtins[SPU_MULO_1].fndecl;
5493 return spu_builtins[SPU_MULO_0].fndecl;
5500 /* Implement targetm.vectorize.builtin_mask_for_load. */
5502 spu_builtin_mask_for_load (void)
5504 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5509 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5511 spu_builtin_vectorization_cost (bool runtime_test)
5513 /* If the branch of the runtime test is taken - i.e. - the vectorized
5514 version is skipped - this incurs a misprediction cost (because the
5515 vectorized version is expected to be the fall-through). So we subtract
5516 the latency of a mispredicted branch from the costs that are incurred
5517 when the vectorized version is executed. */
5524 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5525 after applying N number of iterations. This routine does not determine
5526 how may iterations are required to reach desired alignment. */
5529 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5534 /* All other types are naturally aligned. */
5538 /* Count the total number of instructions in each pipe and return the
5539 maximum, which is used as the Minimum Iteration Interval (MII)
5540 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5541 -2 are instructions that can go in pipe0 or pipe1. */
5543 spu_sms_res_mii (struct ddg *g)
5546 unsigned t[4] = {0, 0, 0, 0};
5548 for (i = 0; i < g->num_nodes; i++)
5550 rtx insn = g->nodes[i].insn;
5551 int p = get_pipe (insn) + 2;
5557 if (dump_file && INSN_P (insn))
5558 fprintf (dump_file, "i%d %s %d %d\n",
5560 insn_data[INSN_CODE(insn)].name,
5564 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
5566 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
5571 spu_init_expanders (void)
5573 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5574 * frame_pointer_needed is true. We don't know that until we're
5575 * expanding the prologue. */
5577 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
5580 static enum machine_mode
5581 spu_libgcc_cmp_return_mode (void)
5584 /* For SPU word mode is TI mode so it is better to use SImode
5585 for compare returns. */
5589 static enum machine_mode
5590 spu_libgcc_shift_count_mode (void)
5592 /* For SPU word mode is TI mode so it is better to use SImode
5593 for shift counts. */