1 /* Copyright (C) 2006 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 #include "coretypes.h"
24 #include "hard-reg-set.h"
26 #include "insn-config.h"
27 #include "conditions.h"
28 #include "insn-attr.h"
38 #include "basic-block.h"
39 #include "integrate.h"
45 #include "target-def.h"
46 #include "langhooks.h"
48 #include "cfglayout.h"
49 #include "sched-int.h"
54 #include "tree-gimple.h"
55 #include "tm-constrs.h"
56 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
64 static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88 static rtx get_pic_reg (void);
89 static int need_to_save_reg (int regno, int saving);
90 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
94 static void emit_nop_for_insn (rtx insn);
95 static bool insn_clobbers_hbr (rtx insn);
96 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
98 static rtx get_branch_target (rtx branch);
99 static void insert_branch_hints (void);
100 static void insert_nops (void);
101 static void spu_machine_dependent_reorg (void);
102 static int spu_sched_issue_rate (void);
103 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
105 static int get_pipe (rtx insn);
106 static int spu_sched_adjust_priority (rtx insn, int pri);
107 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
108 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
110 unsigned char *no_add_attrs);
111 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
113 unsigned char *no_add_attrs);
114 static int spu_naked_function_p (tree func);
115 static unsigned char spu_pass_by_reference (int *cum, enum machine_mode mode,
116 tree type, unsigned char named);
117 static tree spu_build_builtin_va_list (void);
118 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
120 static int regno_aligned_for_load (int regno);
121 static int store_with_one_insn_p (rtx mem);
122 static int reg_align (rtx reg);
123 static int mem_is_padded_component_ref (rtx x);
124 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
125 static void spu_asm_globalize_label (FILE * file, const char *name);
126 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
128 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
129 static void spu_init_libfuncs (void);
130 static bool spu_return_in_memory (tree type, tree fntype);
131 static void fix_range (const char *);
132 static void spu_encode_section_info (tree, rtx, int);
134 extern const char *reg_names[];
135 rtx spu_compare_op0, spu_compare_op1;
150 IC_POOL, /* constant pool */
151 IC_IL1, /* one il* instruction */
152 IC_IL2, /* both ilhu and iohl instructions */
153 IC_IL1s, /* one il* instruction */
154 IC_IL2s, /* both ilhu and iohl instructions */
155 IC_FSMBI, /* the fsmbi instruction */
156 IC_CPAT, /* one of the c*d instructions */
159 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
160 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
161 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
162 static enum immediate_class classify_immediate (rtx op,
163 enum machine_mode mode);
165 /* Built in types. */
166 tree spu_builtin_types[SPU_BTI_MAX];
168 /* TARGET overrides. */
170 #undef TARGET_INIT_BUILTINS
171 #define TARGET_INIT_BUILTINS spu_init_builtins
173 #undef TARGET_EXPAND_BUILTIN
174 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
176 #undef TARGET_EH_RETURN_FILTER_MODE
177 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
179 /* The .8byte directive doesn't seem to work well for a 32 bit
181 #undef TARGET_ASM_UNALIGNED_DI_OP
182 #define TARGET_ASM_UNALIGNED_DI_OP NULL
184 #undef TARGET_RTX_COSTS
185 #define TARGET_RTX_COSTS spu_rtx_costs
187 #undef TARGET_ADDRESS_COST
188 #define TARGET_ADDRESS_COST hook_int_rtx_0
190 #undef TARGET_SCHED_ISSUE_RATE
191 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
193 #undef TARGET_SCHED_VARIABLE_ISSUE
194 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
196 #undef TARGET_SCHED_ADJUST_PRIORITY
197 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
199 #undef TARGET_SCHED_ADJUST_COST
200 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
202 const struct attribute_spec spu_attribute_table[];
203 #undef TARGET_ATTRIBUTE_TABLE
204 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
206 #undef TARGET_ASM_INTEGER
207 #define TARGET_ASM_INTEGER spu_assemble_integer
209 #undef TARGET_SCALAR_MODE_SUPPORTED_P
210 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
212 #undef TARGET_VECTOR_MODE_SUPPORTED_P
213 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
215 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
216 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
218 #undef TARGET_ASM_GLOBALIZE_LABEL
219 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
221 #undef TARGET_PASS_BY_REFERENCE
222 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
224 #undef TARGET_MUST_PASS_IN_STACK
225 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
227 #undef TARGET_BUILD_BUILTIN_VA_LIST
228 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
230 #undef TARGET_SETUP_INCOMING_VARARGS
231 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
233 #undef TARGET_MACHINE_DEPENDENT_REORG
234 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
236 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
237 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
239 #undef TARGET_DEFAULT_TARGET_FLAGS
240 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
242 #undef TARGET_INIT_LIBFUNCS
243 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
245 #undef TARGET_RETURN_IN_MEMORY
246 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
248 #undef TARGET_ENCODE_SECTION_INFO
249 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
251 struct gcc_target targetm = TARGET_INITIALIZER;
253 /* Sometimes certain combinations of command options do not make sense
254 on a particular target machine. You can define a macro
255 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
256 executed once just after all the command options have been parsed. */
258 spu_override_options (void)
261 /* Override some of the default param values. With so many registers
262 larger values are better for these params. */
263 if (MAX_UNROLLED_INSNS == 100)
264 MAX_UNROLLED_INSNS = 250;
265 if (MAX_PENDING_LIST_LENGTH == 32)
266 MAX_PENDING_LIST_LENGTH = 128;
268 flag_omit_frame_pointer = 1;
270 if (align_functions < 8)
273 if (spu_fixed_range_string)
274 fix_range (spu_fixed_range_string);
277 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
278 struct attribute_spec.handler. */
280 /* Table of machine attributes. */
281 const struct attribute_spec spu_attribute_table[] =
283 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
284 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
285 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
286 { NULL, 0, 0, false, false, false, NULL }
289 /* True if MODE is valid for the target. By "valid", we mean able to
290 be manipulated in non-trivial ways. In particular, this means all
291 the arithmetic is supported. */
293 spu_scalar_mode_supported_p (enum machine_mode mode)
311 /* Similarly for vector modes. "Supported" here is less strict. At
312 least some operations are supported; need to check optabs or builtins
313 for further details. */
315 spu_vector_mode_supported_p (enum machine_mode mode)
332 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
333 least significant bytes of the outer mode. This function returns
334 TRUE for the SUBREG's where this is correct. */
336 valid_subreg (rtx op)
338 enum machine_mode om = GET_MODE (op);
339 enum machine_mode im = GET_MODE (SUBREG_REG (op));
340 return om != VOIDmode && im != VOIDmode
341 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
342 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
345 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
346 and adjust the start offset. */
348 adjust_operand (rtx op, HOST_WIDE_INT * start)
350 enum machine_mode mode;
352 /* Strip any SUBREG */
353 if (GET_CODE (op) == SUBREG)
357 GET_MODE_BITSIZE (GET_MODE (op)) -
358 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
359 op = SUBREG_REG (op);
361 /* If it is smaller than SI, assure a SUBREG */
362 op_size = GET_MODE_BITSIZE (GET_MODE (op));
366 *start += 32 - op_size;
369 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
370 mode = mode_for_size (op_size, MODE_INT, 0);
371 if (mode != GET_MODE (op))
372 op = gen_rtx_SUBREG (mode, op, 0);
377 spu_expand_extv (rtx ops[], int unsignedp)
379 HOST_WIDE_INT width = INTVAL (ops[2]);
380 HOST_WIDE_INT start = INTVAL (ops[3]);
381 HOST_WIDE_INT src_size, dst_size;
382 enum machine_mode src_mode, dst_mode;
383 rtx dst = ops[0], src = ops[1];
386 dst = adjust_operand (ops[0], 0);
387 dst_mode = GET_MODE (dst);
388 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
390 src = adjust_operand (src, &start);
391 src_mode = GET_MODE (src);
392 src_size = GET_MODE_BITSIZE (GET_MODE (src));
396 s = gen_reg_rtx (src_mode);
400 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
403 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
406 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
414 if (width < src_size)
421 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
424 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
427 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
432 s = gen_reg_rtx (src_mode);
433 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
438 convert_move (dst, src, unsignedp);
442 spu_expand_insv (rtx ops[])
444 HOST_WIDE_INT width = INTVAL (ops[1]);
445 HOST_WIDE_INT start = INTVAL (ops[2]);
446 HOST_WIDE_INT maskbits;
447 enum machine_mode dst_mode, src_mode;
448 rtx dst = ops[0], src = ops[3];
449 int dst_size, src_size;
455 if (GET_CODE (ops[0]) == MEM)
456 dst = gen_reg_rtx (TImode);
458 dst = adjust_operand (dst, &start);
459 dst_mode = GET_MODE (dst);
460 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
462 if (CONSTANT_P (src))
464 enum machine_mode m =
465 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
466 src = force_reg (m, convert_to_mode (m, src, 0));
468 src = adjust_operand (src, 0);
469 src_mode = GET_MODE (src);
470 src_size = GET_MODE_BITSIZE (GET_MODE (src));
472 mask = gen_reg_rtx (dst_mode);
473 shift_reg = gen_reg_rtx (dst_mode);
474 shift = dst_size - start - width;
476 /* It's not safe to use subreg here because the compiler assumes
477 that the SUBREG_REG is right justified in the SUBREG. */
478 convert_move (shift_reg, src, 1);
485 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
488 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
491 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
503 maskbits = (-1ll << (32 - width - start));
505 maskbits += (1ll << (32 - start));
506 emit_move_insn (mask, GEN_INT (maskbits));
509 maskbits = (-1ll << (64 - width - start));
511 maskbits += (1ll << (64 - start));
512 emit_move_insn (mask, GEN_INT (maskbits));
516 unsigned char arr[16];
518 memset (arr, 0, sizeof (arr));
519 arr[i] = 0xff >> (start & 7);
520 for (i++; i <= (start + width - 1) / 8; i++)
522 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
523 emit_move_insn (mask, array_to_constant (TImode, arr));
529 if (GET_CODE (ops[0]) == MEM)
531 rtx aligned = gen_reg_rtx (SImode);
532 rtx low = gen_reg_rtx (SImode);
533 rtx addr = gen_reg_rtx (SImode);
534 rtx rotl = gen_reg_rtx (SImode);
535 rtx mask0 = gen_reg_rtx (TImode);
538 emit_move_insn (addr, XEXP (ops[0], 0));
539 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
540 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
541 emit_insn (gen_negsi2 (rotl, low));
542 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
543 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
544 mem = change_address (ops[0], TImode, aligned);
545 set_mem_alias_set (mem, 0);
546 emit_move_insn (dst, mem);
547 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
548 emit_move_insn (mem, dst);
549 if (start + width > MEM_ALIGN (ops[0]))
551 rtx shl = gen_reg_rtx (SImode);
552 rtx mask1 = gen_reg_rtx (TImode);
553 rtx dst1 = gen_reg_rtx (TImode);
555 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
556 emit_insn (gen_shlqby_ti (mask1, mask, shl));
557 mem1 = adjust_address (mem, TImode, 16);
558 set_mem_alias_set (mem1, 0);
559 emit_move_insn (dst1, mem1);
560 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
561 emit_move_insn (mem1, dst1);
565 emit_insn (gen_selb (dst, dst, shift_reg, mask));
570 spu_expand_block_move (rtx ops[])
572 HOST_WIDE_INT bytes, align, offset;
573 rtx src, dst, sreg, dreg, target;
575 if (GET_CODE (ops[2]) != CONST_INT
576 || GET_CODE (ops[3]) != CONST_INT
577 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
580 bytes = INTVAL (ops[2]);
581 align = INTVAL (ops[3]);
591 for (offset = 0; offset + 16 <= bytes; offset += 16)
593 dst = adjust_address (ops[0], V16QImode, offset);
594 src = adjust_address (ops[1], V16QImode, offset);
595 emit_move_insn (dst, src);
600 unsigned char arr[16] = { 0 };
601 for (i = 0; i < bytes - offset; i++)
603 dst = adjust_address (ops[0], V16QImode, offset);
604 src = adjust_address (ops[1], V16QImode, offset);
605 mask = gen_reg_rtx (V16QImode);
606 sreg = gen_reg_rtx (V16QImode);
607 dreg = gen_reg_rtx (V16QImode);
608 target = gen_reg_rtx (V16QImode);
609 emit_move_insn (mask, array_to_constant (V16QImode, arr));
610 emit_move_insn (dreg, dst);
611 emit_move_insn (sreg, src);
612 emit_insn (gen_selb (target, dreg, sreg, mask));
613 emit_move_insn (dst, target);
621 { SPU_EQ, SPU_GT, SPU_GTU };
624 int spu_comp_icode[8][3] = {
625 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
626 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
627 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
628 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
629 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
630 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
632 {CODE_FOR_ceq_vec, 0, 0},
635 /* Generate a compare for CODE. Return a brand-new rtx that represents
636 the result of the compare. GCC can figure this out too if we don't
637 provide all variations of compares, but GCC always wants to use
638 WORD_MODE, we can generate better code in most cases if we do it
641 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
643 int reverse_compare = 0;
644 int reverse_test = 0;
647 rtx target = operands[0];
648 enum machine_mode comp_mode;
649 enum machine_mode op_mode;
650 enum spu_comp_code scode;
653 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
654 and so on, to keep the constant in operand 1. */
655 if (GET_CODE (spu_compare_op1) == CONST_INT)
657 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
658 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
662 spu_compare_op1 = GEN_INT (val);
666 spu_compare_op1 = GEN_INT (val);
670 spu_compare_op1 = GEN_INT (val);
674 spu_compare_op1 = GEN_INT (val);
735 op_mode = GET_MODE (spu_compare_op0);
774 if (GET_MODE (spu_compare_op1) == DFmode)
776 rtx reg = gen_reg_rtx (DFmode);
777 if (!flag_unsafe_math_optimizations
778 || (scode != SPU_GT && scode != SPU_EQ))
781 emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
783 emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
785 spu_compare_op0 = reg;
786 spu_compare_op1 = CONST0_RTX (DFmode);
789 if (is_set == 0 && spu_compare_op1 == const0_rtx
790 && (GET_MODE (spu_compare_op0) == SImode
791 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
793 /* Don't need to set a register with the result when we are
794 comparing against zero and branching. */
795 reverse_test = !reverse_test;
796 compare_result = spu_compare_op0;
800 compare_result = gen_reg_rtx (comp_mode);
804 rtx t = spu_compare_op1;
805 spu_compare_op1 = spu_compare_op0;
809 if (spu_comp_icode[index][scode] == 0)
812 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
813 (spu_compare_op0, op_mode))
814 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
815 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
816 (spu_compare_op1, op_mode))
817 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
818 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
823 emit_insn (comp_rtx);
832 /* We don't have branch on QI compare insns, so we convert the
833 QI compare result to a HI result. */
834 if (comp_mode == QImode)
836 rtx old_res = compare_result;
837 compare_result = gen_reg_rtx (HImode);
839 emit_insn (gen_extendqihi2 (compare_result, old_res));
843 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
845 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
847 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
848 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
849 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
852 else if (is_set == 2)
854 int compare_size = GET_MODE_BITSIZE (comp_mode);
855 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
856 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
858 rtx op_t = operands[2];
859 rtx op_f = operands[3];
861 /* The result of the comparison can be SI, HI or QI mode. Create a
862 mask based on that result. */
863 if (target_size > compare_size)
865 select_mask = gen_reg_rtx (mode);
866 emit_insn (gen_extend_compare (select_mask, compare_result));
868 else if (target_size < compare_size)
870 gen_rtx_SUBREG (mode, compare_result,
871 (compare_size - target_size) / BITS_PER_UNIT);
872 else if (comp_mode != mode)
873 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
875 select_mask = compare_result;
877 if (GET_MODE (target) != GET_MODE (op_t)
878 || GET_MODE (target) != GET_MODE (op_f))
882 emit_insn (gen_selb (target, op_t, op_f, select_mask));
884 emit_insn (gen_selb (target, op_f, op_t, select_mask));
889 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
890 gen_rtx_NOT (comp_mode, compare_result)));
891 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
892 emit_insn (gen_extendhisi2 (target, compare_result));
893 else if (GET_MODE (target) == SImode
894 && GET_MODE (compare_result) == QImode)
895 emit_insn (gen_extend_compare (target, compare_result));
897 emit_move_insn (target, compare_result);
902 const_double_to_hwint (rtx x)
906 if (GET_MODE (x) == SFmode)
908 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
909 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
911 else if (GET_MODE (x) == DFmode)
914 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
915 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
917 val = (val << 32) | (l[1] & 0xffffffff);
925 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
929 gcc_assert (mode == SFmode || mode == DFmode);
932 tv[0] = (v << 32) >> 32;
933 else if (mode == DFmode)
935 tv[1] = (v << 32) >> 32;
938 real_from_target (&rv, tv, mode);
939 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
943 print_operand_address (FILE * file, register rtx addr)
948 if (GET_CODE (addr) == AND
949 && GET_CODE (XEXP (addr, 1)) == CONST_INT
950 && INTVAL (XEXP (addr, 1)) == -16)
951 addr = XEXP (addr, 0);
953 switch (GET_CODE (addr))
956 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
960 reg = XEXP (addr, 0);
961 offset = XEXP (addr, 1);
962 if (GET_CODE (offset) == REG)
964 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
965 reg_names[REGNO (offset)]);
967 else if (GET_CODE (offset) == CONST_INT)
969 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
970 INTVAL (offset), reg_names[REGNO (reg)]);
980 output_addr_const (file, addr);
990 print_operand (FILE * file, rtx x, int code)
992 enum machine_mode mode = GET_MODE (x);
994 unsigned char arr[16];
995 int xcode = GET_CODE (x);
997 if (GET_MODE (x) == VOIDmode)
1000 case 'L': /* 128 bits, signed */
1001 case 'm': /* 128 bits, signed */
1002 case 'T': /* 128 bits, signed */
1003 case 't': /* 128 bits, signed */
1006 case 'K': /* 64 bits, signed */
1007 case 'k': /* 64 bits, signed */
1008 case 'D': /* 64 bits, signed */
1009 case 'd': /* 64 bits, signed */
1012 case 'J': /* 32 bits, signed */
1013 case 'j': /* 32 bits, signed */
1014 case 's': /* 32 bits, signed */
1015 case 'S': /* 32 bits, signed */
1022 case 'j': /* 32 bits, signed */
1023 case 'k': /* 64 bits, signed */
1024 case 'm': /* 128 bits, signed */
1025 if (xcode == CONST_INT
1026 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1028 gcc_assert (logical_immediate_p (x, mode));
1029 constant_to_array (mode, x, arr);
1030 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1031 val = trunc_int_for_mode (val, SImode);
1032 switch (which_logical_immediate (val))
1037 fprintf (file, "h");
1040 fprintf (file, "b");
1050 case 'J': /* 32 bits, signed */
1051 case 'K': /* 64 bits, signed */
1052 case 'L': /* 128 bits, signed */
1053 if (xcode == CONST_INT
1054 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1056 gcc_assert (logical_immediate_p (x, mode)
1057 || iohl_immediate_p (x, mode));
1058 constant_to_array (mode, x, arr);
1059 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1060 val = trunc_int_for_mode (val, SImode);
1061 switch (which_logical_immediate (val))
1067 val = trunc_int_for_mode (val, HImode);
1070 val = trunc_int_for_mode (val, QImode);
1075 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1081 case 't': /* 128 bits, signed */
1082 case 'd': /* 64 bits, signed */
1083 case 's': /* 32 bits, signed */
1086 enum immediate_class c = classify_immediate (x, mode);
1090 constant_to_array (mode, x, arr);
1091 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1092 val = trunc_int_for_mode (val, SImode);
1093 switch (which_immediate_load (val))
1098 fprintf (file, "a");
1101 fprintf (file, "h");
1104 fprintf (file, "hu");
1111 constant_to_array (mode, x, arr);
1112 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1114 fprintf (file, "b");
1116 fprintf (file, "h");
1118 fprintf (file, "w");
1120 fprintf (file, "d");
1123 if (xcode == CONST_VECTOR)
1125 x = CONST_VECTOR_ELT (x, 0);
1126 xcode = GET_CODE (x);
1128 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1129 fprintf (file, "a");
1130 else if (xcode == HIGH)
1131 fprintf (file, "hu");
1144 case 'T': /* 128 bits, signed */
1145 case 'D': /* 64 bits, signed */
1146 case 'S': /* 32 bits, signed */
1149 enum immediate_class c = classify_immediate (x, mode);
1153 constant_to_array (mode, x, arr);
1154 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1155 val = trunc_int_for_mode (val, SImode);
1156 switch (which_immediate_load (val))
1163 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1168 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1171 constant_to_array (mode, x, arr);
1173 for (i = 0; i < 16; i++)
1178 print_operand (file, GEN_INT (val), 0);
1181 constant_to_array (mode, x, arr);
1182 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1183 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1186 if (xcode == CONST_VECTOR)
1188 x = CONST_VECTOR_ELT (x, 0);
1189 xcode = GET_CODE (x);
1193 output_addr_const (file, XEXP (x, 0));
1194 fprintf (file, "@h");
1197 output_addr_const (file, x);
1210 if (xcode == CONST_INT)
1212 /* Only 4 least significant bits are relevant for generate
1213 control word instructions. */
1214 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1219 case 'M': /* print code for c*d */
1220 if (GET_CODE (x) == CONST_INT)
1224 fprintf (file, "b");
1227 fprintf (file, "h");
1230 fprintf (file, "w");
1233 fprintf (file, "d");
1242 case 'N': /* Negate the operand */
1243 if (xcode == CONST_INT)
1244 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1245 else if (xcode == CONST_VECTOR)
1246 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1247 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1250 case 'I': /* enable/disable interrupts */
1251 if (xcode == CONST_INT)
1252 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1255 case 'b': /* branch modifiers */
1257 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1258 else if (COMPARISON_P (x))
1259 fprintf (file, "%s", xcode == NE ? "n" : "");
1262 case 'i': /* indirect call */
1265 if (GET_CODE (XEXP (x, 0)) == REG)
1266 /* Used in indirect function calls. */
1267 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1269 output_address (XEXP (x, 0));
1273 case 'p': /* load/store */
1277 xcode = GET_CODE (x);
1282 xcode = GET_CODE (x);
1285 fprintf (file, "d");
1286 else if (xcode == CONST_INT)
1287 fprintf (file, "a");
1288 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1289 fprintf (file, "r");
1290 else if (xcode == PLUS || xcode == LO_SUM)
1292 if (GET_CODE (XEXP (x, 1)) == REG)
1293 fprintf (file, "x");
1295 fprintf (file, "d");
1301 fprintf (file, "%s", reg_names[REGNO (x)]);
1302 else if (xcode == MEM)
1303 output_address (XEXP (x, 0));
1304 else if (xcode == CONST_VECTOR)
1305 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1307 output_addr_const (file, x);
1311 output_operand_lossage ("invalid %%xn code");
1316 extern char call_used_regs[];
1317 extern char regs_ever_live[];
1319 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1320 caller saved register. For leaf functions it is more efficient to
1321 use a volatile register because we won't need to save and restore the
1322 pic register. This routine is only valid after register allocation
1323 is completed, so we can pick an unused register. */
1327 rtx pic_reg = pic_offset_table_rtx;
1328 if (!reload_completed && !reload_in_progress)
1333 /* Split constant addresses to handle cases that are too large. Also, add in
1334 the pic register when in PIC mode. */
1336 spu_split_immediate (rtx * ops)
1338 enum machine_mode mode = GET_MODE (ops[0]);
1339 enum immediate_class c = classify_immediate (ops[1], mode);
1345 unsigned char arrhi[16];
1346 unsigned char arrlo[16];
1349 constant_to_array (mode, ops[1], arrhi);
1350 to = no_new_pseudos ? ops[0] : gen_reg_rtx (mode);
1351 for (i = 0; i < 16; i += 4)
1353 arrlo[i + 2] = arrhi[i + 2];
1354 arrlo[i + 3] = arrhi[i + 3];
1355 arrlo[i + 0] = arrlo[i + 1] = 0;
1356 arrhi[i + 2] = arrhi[i + 3] = 0;
1358 hi = array_to_constant (mode, arrhi);
1359 lo = array_to_constant (mode, arrlo);
1360 emit_move_insn (to, hi);
1361 emit_insn (gen_rtx_SET
1362 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1366 if (reload_in_progress || reload_completed)
1368 rtx mem = force_const_mem (mode, ops[1]);
1369 if (TARGET_LARGE_MEM)
1371 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1372 emit_move_insn (addr, XEXP (mem, 0));
1373 mem = replace_equiv_address (mem, addr);
1375 emit_move_insn (ops[0], mem);
1381 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1385 emit_insn (gen_high (ops[0], ops[1]));
1386 emit_insn (gen_low (ops[0], ops[0], ops[1]));
1389 emit_insn (gen_pic (ops[0], ops[1]));
1392 rtx pic_reg = get_pic_reg ();
1393 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1394 current_function_uses_pic_offset_table = 1;
1396 return flag_pic || c == IC_IL2s;
1407 /* SAVING is TRUE when we are generating the actual load and store
1408 instructions for REGNO. When determining the size of the stack
1409 needed for saving register we must allocate enough space for the
1410 worst case, because we don't always have the information early enough
1411 to not allocate it. But we can at least eliminate the actual loads
1412 and stores during the prologue/epilogue. */
1414 need_to_save_reg (int regno, int saving)
1416 if (regs_ever_live[regno] && !call_used_regs[regno])
1419 && regno == PIC_OFFSET_TABLE_REGNUM
1420 && (!saving || current_function_uses_pic_offset_table)
1422 || !current_function_is_leaf || regs_ever_live[LAST_ARG_REGNUM]))
1427 /* This function is only correct starting with local register
1430 spu_saved_regs_size (void)
1432 int reg_save_size = 0;
1435 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1436 if (need_to_save_reg (regno, 0))
1437 reg_save_size += 0x10;
1438 return reg_save_size;
1442 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1444 rtx reg = gen_rtx_REG (V4SImode, regno);
1446 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1447 return emit_insn (gen_movv4si (mem, reg));
1451 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1453 rtx reg = gen_rtx_REG (V4SImode, regno);
1455 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1456 return emit_insn (gen_movv4si (reg, mem));
1459 /* This happens after reload, so we need to expand it. */
1461 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1464 if (satisfies_constraint_K (GEN_INT (imm)))
1466 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1470 insn = emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1471 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1473 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1474 if (REGNO (src) == REGNO (scratch))
1477 if (REGNO (dst) == REGNO (scratch))
1478 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1483 /* Return nonzero if this function is known to have a null epilogue. */
1486 direct_return (void)
1488 if (reload_completed)
1490 if (cfun->static_chain_decl == 0
1491 && (spu_saved_regs_size ()
1493 + current_function_outgoing_args_size
1494 + current_function_pretend_args_size == 0)
1495 && current_function_is_leaf)
1502 The stack frame looks like this:
1509 prev SP | back chain |
1512 | reg save | current_function_pretend_args_size bytes
1515 | saved regs | spu_saved_regs_size() bytes
1518 FP | vars | get_frame_size() bytes
1522 | args | current_function_outgoing_args_size bytes
1532 spu_expand_prologue (void)
1534 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1535 HOST_WIDE_INT total_size;
1536 HOST_WIDE_INT saved_regs_size;
1537 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1538 rtx scratch_reg_0, scratch_reg_1;
1541 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1542 the "toplevel" insn chain. */
1543 emit_note (NOTE_INSN_DELETED);
1545 if (flag_pic && optimize == 0)
1546 current_function_uses_pic_offset_table = 1;
1548 if (spu_naked_function_p (current_function_decl))
1551 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1552 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1554 saved_regs_size = spu_saved_regs_size ();
1555 total_size = size + saved_regs_size
1556 + current_function_outgoing_args_size
1557 + current_function_pretend_args_size;
1559 if (!current_function_is_leaf
1560 || current_function_calls_alloca || total_size > 0)
1561 total_size += STACK_POINTER_OFFSET;
1563 /* Save this first because code after this might use the link
1564 register as a scratch register. */
1565 if (!current_function_is_leaf)
1567 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1568 RTX_FRAME_RELATED_P (insn) = 1;
1573 offset = -current_function_pretend_args_size;
1574 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1575 if (need_to_save_reg (regno, 1))
1578 insn = frame_emit_store (regno, sp_reg, offset);
1579 RTX_FRAME_RELATED_P (insn) = 1;
1583 if (flag_pic && current_function_uses_pic_offset_table)
1585 rtx pic_reg = get_pic_reg ();
1586 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1587 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1589 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1590 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1596 if (flag_stack_check)
1598 /* We compare agains total_size-1 because
1599 ($sp >= total_size) <=> ($sp > total_size-1) */
1600 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1601 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1602 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1603 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1605 emit_move_insn (scratch_v4si, size_v4si);
1606 size_v4si = scratch_v4si;
1608 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1609 emit_insn (gen_vec_extractv4si
1610 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1611 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1614 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1615 the value of the previous $sp because we save it as the back
1617 if (total_size <= 2000)
1619 /* In this case we save the back chain first. */
1620 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1621 RTX_FRAME_RELATED_P (insn) = 1;
1623 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1625 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1627 insn = emit_move_insn (scratch_reg_0, sp_reg);
1628 RTX_FRAME_RELATED_P (insn) = 1;
1630 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1634 insn = emit_move_insn (scratch_reg_0, sp_reg);
1635 RTX_FRAME_RELATED_P (insn) = 1;
1637 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1639 RTX_FRAME_RELATED_P (insn) = 1;
1640 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1642 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1644 if (total_size > 2000)
1646 /* Save the back chain ptr */
1647 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1648 RTX_FRAME_RELATED_P (insn) = 1;
1651 if (frame_pointer_needed)
1653 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1654 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1655 + current_function_outgoing_args_size;
1656 /* Set the new frame_pointer */
1657 frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1661 emit_note (NOTE_INSN_DELETED);
1665 spu_expand_epilogue (bool sibcall_p)
1667 int size = get_frame_size (), offset, regno;
1668 HOST_WIDE_INT saved_regs_size, total_size;
1669 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1670 rtx jump, scratch_reg_0;
1672 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1673 the "toplevel" insn chain. */
1674 emit_note (NOTE_INSN_DELETED);
1676 if (spu_naked_function_p (current_function_decl))
1679 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1681 saved_regs_size = spu_saved_regs_size ();
1682 total_size = size + saved_regs_size
1683 + current_function_outgoing_args_size
1684 + current_function_pretend_args_size;
1686 if (!current_function_is_leaf
1687 || current_function_calls_alloca || total_size > 0)
1688 total_size += STACK_POINTER_OFFSET;
1692 if (current_function_calls_alloca)
1693 /* Load it from the back chain because our save_stack_block and
1694 restore_stack_block do nothing. */
1695 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1697 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1700 if (saved_regs_size > 0)
1702 offset = -current_function_pretend_args_size;
1703 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1704 if (need_to_save_reg (regno, 1))
1707 frame_emit_load (regno, sp_reg, offset);
1712 if (!current_function_is_leaf)
1713 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1717 emit_insn (gen_rtx_USE
1718 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1719 jump = emit_jump_insn (gen__return ());
1720 emit_barrier_after (jump);
1723 emit_note (NOTE_INSN_DELETED);
1727 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1731 /* This is inefficient because it ends up copying to a save-register
1732 which then gets saved even though $lr has already been saved. But
1733 it does generate better code for leaf functions and we don't need
1734 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1735 used for __builtin_return_address anyway, so maybe we don't care if
1736 it's inefficient. */
1737 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1741 /* Given VAL, generate a constant appropriate for MODE.
1742 If MODE is a vector mode, every element will be VAL.
1743 For TImode, VAL will be zero extended to 128 bits. */
1745 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1751 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1752 || GET_MODE_CLASS (mode) == MODE_FLOAT
1753 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1754 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1756 if (GET_MODE_CLASS (mode) == MODE_INT)
1757 return immed_double_const (val, 0, mode);
1759 /* val is the bit representation of the float */
1760 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1761 return hwint_to_const_double (mode, val);
1763 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1764 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1766 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1768 units = GET_MODE_NUNITS (mode);
1770 v = rtvec_alloc (units);
1772 for (i = 0; i < units; ++i)
1773 RTVEC_ELT (v, i) = inner;
1775 return gen_rtx_CONST_VECTOR (mode, v);
1778 /* branch hint stuff */
1780 /* The hardware requires 8 insns between a hint and the branch it
1781 effects. This variable describes how many rtl instructions the
1782 compiler needs to see before inserting a hint. (FIXME: We should
1783 accept less and insert nops to enforce it because hinting is always
1784 profitable for performance, but we do need to be careful of code
1786 int spu_hint_dist = (8 * 4);
1788 /* An array of these is used to propagate hints to predecessor blocks. */
1791 rtx prop_jump; /* propagated from another block */
1792 basic_block bb; /* the original block. */
1795 /* The special $hbr register is used to prevent the insn scheduler from
1796 moving hbr insns across instructions which invalidate them. It
1797 should only be used in a clobber, and this function searches for
1798 insns which clobber it. */
1800 insn_clobbers_hbr (rtx insn)
1802 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
1804 rtx parallel = PATTERN (insn);
1807 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
1809 clobber = XVECEXP (parallel, 0, j);
1810 if (GET_CODE (clobber) == CLOBBER
1811 && GET_CODE (XEXP (clobber, 0)) == REG
1812 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
1820 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
1823 rtx hint, insn, prev, next;
1825 if (before == 0 || branch == 0 || target == 0)
1832 branch_label = gen_label_rtx ();
1833 LABEL_NUSES (branch_label)++;
1834 LABEL_PRESERVE_P (branch_label) = 1;
1835 insn = emit_label_before (branch_label, branch);
1836 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
1838 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1839 the current insn is pipe0, dual issue with it. */
1840 prev = prev_active_insn (before);
1841 if (prev && get_pipe (prev) == 0)
1842 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1843 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
1845 next = next_active_insn (before);
1846 hint = emit_insn_after (gen_hbr (branch_label, target), before);
1848 PUT_MODE (next, TImode);
1852 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1853 PUT_MODE (hint, TImode);
1855 recog_memoized (hint);
1858 /* Returns 0 if we don't want a hint for this branch. Otherwise return
1859 the rtx for the branch target. */
1861 get_branch_target (rtx branch)
1863 if (GET_CODE (branch) == JUMP_INSN)
1867 /* Return statements */
1868 if (GET_CODE (PATTERN (branch)) == RETURN)
1869 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1872 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
1873 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
1876 set = single_set (branch);
1877 src = SET_SRC (set);
1878 if (GET_CODE (SET_DEST (set)) != PC)
1881 if (GET_CODE (src) == IF_THEN_ELSE)
1884 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
1887 /* If the more probable case is not a fall through, then
1888 try a branch hint. */
1889 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
1890 if (prob > (REG_BR_PROB_BASE * 6 / 10)
1891 && GET_CODE (XEXP (src, 1)) != PC)
1892 lab = XEXP (src, 1);
1893 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
1894 && GET_CODE (XEXP (src, 2)) != PC)
1895 lab = XEXP (src, 2);
1899 if (GET_CODE (lab) == RETURN)
1900 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1908 else if (GET_CODE (branch) == CALL_INSN)
1911 /* All of our call patterns are in a PARALLEL and the CALL is
1912 the first pattern in the PARALLEL. */
1913 if (GET_CODE (PATTERN (branch)) != PARALLEL)
1915 call = XVECEXP (PATTERN (branch), 0, 0);
1916 if (GET_CODE (call) == SET)
1917 call = SET_SRC (call);
1918 if (GET_CODE (call) != CALL)
1920 return XEXP (XEXP (call, 0), 0);
1926 insert_branch_hints (void)
1928 struct spu_bb_info *spu_bb_info;
1929 rtx branch, insn, next;
1930 rtx branch_target = 0;
1931 int branch_addr = 0, insn_addr, head_addr;
1936 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
1937 sizeof (struct spu_bb_info));
1939 /* We need exact insn addresses and lengths. */
1940 shorten_branches (get_insns ());
1942 FOR_EACH_BB_REVERSE (bb)
1944 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
1946 if (spu_bb_info[bb->index].prop_jump)
1948 branch = spu_bb_info[bb->index].prop_jump;
1949 branch_target = get_branch_target (branch);
1950 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
1952 /* Search from end of a block to beginning. In this loop, find
1953 jumps which need a branch and emit them only when:
1954 - it's an indirect branch and we're at the insn which sets
1956 - we're at an insn that will invalidate the hint. e.g., a
1957 call, another hint insn, inline asm that clobbers $hbr, and
1958 some inlined operations (divmodsi4). Don't consider jumps
1959 because they are only at the end of a block and are
1960 considered when we are deciding whether to propagate
1961 - we're getting too far away from the branch. The hbr insns
1962 only have a signed 10 bit offset
1963 We go back as far as possible so the branch will be considered
1964 for propagation when we get to the beginning of the block. */
1966 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
1970 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
1972 && ((GET_CODE (branch_target) == REG
1973 && set_of (branch_target, insn) != NULL_RTX)
1974 || insn_clobbers_hbr (insn)
1975 || branch_addr - insn_addr > 600))
1977 int next_addr = INSN_ADDRESSES (INSN_UID (next));
1978 if (insn != BB_END (bb)
1979 && branch_addr - next_addr >= spu_hint_dist)
1983 "hint for %i in block %i before %i\n",
1984 INSN_UID (branch), bb->index, INSN_UID (next));
1985 spu_emit_branch_hint (next, branch, branch_target,
1986 branch_addr - next_addr);
1991 /* JUMP_P will only be true at the end of a block. When
1992 branch is already set it means we've previously decided
1993 to propagate a hint for that branch into this block. */
1994 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
1997 if ((branch_target = get_branch_target (insn)))
2000 branch_addr = insn_addr;
2004 /* When a branch hint is emitted it will be inserted
2005 before "next". Make sure next is the beginning of a
2006 cycle to minimize impact on the scheduled insns. */
2007 if (GET_MODE (insn) == TImode)
2010 if (insn == BB_HEAD (bb))
2016 /* If we haven't emitted a hint for this branch yet, it might
2017 be profitable to emit it in one of the predecessor blocks,
2018 especially for loops. */
2020 basic_block prev = 0, prop = 0, prev2 = 0;
2021 int loop_exit = 0, simple_loop = 0;
2024 next_addr = INSN_ADDRESSES (INSN_UID (next));
2026 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2027 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2028 prev = EDGE_PRED (bb, j)->src;
2030 prev2 = EDGE_PRED (bb, j)->src;
2032 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2033 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2035 else if (EDGE_SUCC (bb, j)->dest == bb)
2038 /* If this branch is a loop exit then propagate to previous
2039 fallthru block. This catches the cases when it is a simple
2040 loop or when there is an initial branch into the loop. */
2041 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2044 /* If there is only one adjacent predecessor. Don't propagate
2045 outside this loop. This loop_depth test isn't perfect, but
2046 I'm not sure the loop_father member is valid at this point. */
2047 else if (prev && single_pred_p (bb)
2048 && prev->loop_depth == bb->loop_depth)
2051 /* If this is the JOIN block of a simple IF-THEN then
2052 propogate the hint to the HEADER block. */
2053 else if (prev && prev2
2054 && EDGE_COUNT (bb->preds) == 2
2055 && EDGE_COUNT (prev->preds) == 1
2056 && EDGE_PRED (prev, 0)->src == prev2
2057 && prev2->loop_depth == bb->loop_depth
2058 && GET_CODE (branch_target) != REG)
2061 /* Don't propagate when:
2062 - this is a simple loop and the hint would be too far
2063 - this is not a simple loop and there are 16 insns in
2065 - the predecessor block ends in a branch that will be
2067 - the predecessor block ends in an insn that invalidates
2071 && (bbend = BB_END (prop))
2072 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2073 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2074 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2077 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2078 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2079 bb->index, prop->index, bb->loop_depth,
2080 INSN_UID (branch), loop_exit, simple_loop,
2081 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2083 spu_bb_info[prop->index].prop_jump = branch;
2084 spu_bb_info[prop->index].bb = bb;
2086 else if (next && branch_addr - next_addr >= spu_hint_dist)
2089 fprintf (dump_file, "hint for %i in block %i before %i\n",
2090 INSN_UID (branch), bb->index, INSN_UID (next));
2091 spu_emit_branch_hint (next, branch, branch_target,
2092 branch_addr - next_addr);
2100 /* Emit a nop for INSN such that the two will dual issue. This assumes
2101 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2102 We check for TImode to handle a MULTI1 insn which has dual issued its
2103 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2106 emit_nop_for_insn (rtx insn)
2110 p = get_pipe (insn);
2111 if (p == 1 && GET_MODE (insn) == TImode)
2113 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2114 PUT_MODE (new_insn, TImode);
2115 PUT_MODE (insn, VOIDmode);
2118 new_insn = emit_insn_after (gen_lnop (), insn);
2121 /* Insert nops in basic blocks to meet dual issue alignment
2126 rtx insn, next_insn, prev_insn;
2130 /* This sets up INSN_ADDRESSES. */
2131 shorten_branches (get_insns ());
2133 /* Keep track of length added by nops. */
2137 for (insn = get_insns (); insn; insn = next_insn)
2139 next_insn = next_active_insn (insn);
2140 addr = INSN_ADDRESSES (INSN_UID (insn));
2141 if (GET_MODE (insn) == TImode
2143 && GET_MODE (next_insn) != TImode
2144 && ((addr + length) & 7) != 0)
2146 /* prev_insn will always be set because the first insn is
2147 always 8-byte aligned. */
2148 emit_nop_for_insn (prev_insn);
2156 spu_machine_dependent_reorg (void)
2160 if (TARGET_BRANCH_HINTS)
2161 insert_branch_hints ();
2167 /* Insn scheduling routines, primarily for dual issue. */
2169 spu_sched_issue_rate (void)
2175 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2176 int verbose ATTRIBUTE_UNUSED, rtx insn,
2179 if (GET_CODE (PATTERN (insn)) != USE
2180 && GET_CODE (PATTERN (insn)) != CLOBBER
2181 && get_pipe (insn) != -2)
2183 return can_issue_more;
2190 /* Handle inline asm */
2191 if (INSN_CODE (insn) == -1)
2193 t = get_attr_type (insn);
2209 case TYPE_IPREFETCH:
2226 spu_sched_adjust_priority (rtx insn, int pri)
2228 int p = get_pipe (insn);
2229 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2231 if (GET_CODE (PATTERN (insn)) == USE
2232 || GET_CODE (PATTERN (insn)) == CLOBBER
2235 /* Schedule pipe0 insns early for greedier dual issue. */
2241 /* INSN is dependent on DEP_INSN. */
2243 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2244 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2246 if (GET_CODE (insn) == CALL_INSN)
2248 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2249 scheduler makes every insn in a block anti-dependent on the final
2250 jump_insn. We adjust here so higher cost insns will get scheduled
2252 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2253 return INSN_COST (dep_insn) - 3;
2257 /* Create a CONST_DOUBLE from a string. */
2259 spu_float_const (const char *string, enum machine_mode mode)
2261 REAL_VALUE_TYPE value;
2262 value = REAL_VALUE_ATOF (string, mode);
2263 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2266 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2267 CONST_INT fits constraint 'K', i.e., is small. */
2269 legitimate_const (rtx x, int aligned)
2271 /* We can never know if the resulting address fits in 18 bits and can be
2272 loaded with ila. Instead we should use the HI and LO relocations to
2273 load a 32 bit address. */
2276 gcc_assert (GET_CODE (x) == CONST);
2278 if (GET_CODE (XEXP (x, 0)) != PLUS)
2280 sym = XEXP (XEXP (x, 0), 0);
2281 cst = XEXP (XEXP (x, 0), 1);
2282 if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT)
2284 if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym)))
2286 return satisfies_constraint_K (cst);
2290 spu_constant_address_p (rtx x)
2292 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2293 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2294 || GET_CODE (x) == HIGH);
2297 static enum spu_immediate
2298 which_immediate_load (HOST_WIDE_INT val)
2300 gcc_assert (val == trunc_int_for_mode (val, SImode));
2302 if (val >= -0x8000 && val <= 0x7fff)
2304 if (val >= 0 && val <= 0x3ffff)
2306 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2308 if ((val & 0xffff) == 0)
2314 /* Return true when OP can be loaded by one of the il instructions, or
2315 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2317 immediate_load_p (rtx op, enum machine_mode mode)
2319 if (CONSTANT_P (op))
2321 enum immediate_class c = classify_immediate (op, mode);
2322 return c == IC_IL1 || (!flow2_completed && c == IC_IL2);
2327 /* Return true if the first SIZE bytes of arr is a constant that can be
2328 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2329 represent the size and offset of the instruction to use. */
2331 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2333 int cpat, run, i, start;
2337 for (i = 0; i < size && cpat; i++)
2345 else if (arr[i] == 2 && arr[i+1] == 3)
2347 else if (arr[i] == 0)
2349 while (arr[i+run] == run && i+run < 16)
2351 if (run != 4 && run != 8)
2356 if ((i & (run-1)) != 0)
2370 *pstart = start == -1 ? 16-run : start;
2376 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2377 it into a regiser. MODE is only valid when OP is a CONST_INT. */
2378 static enum immediate_class
2379 classify_immediate (rtx op, enum machine_mode mode)
2382 unsigned char arr[16];
2383 int i, j, repeated, fsmbi;
2385 gcc_assert (CONSTANT_P (op));
2387 if (GET_MODE (op) != VOIDmode)
2388 mode = GET_MODE (op);
2390 /* A V4SI const_vector with all identical symbols is ok. */
2391 if (mode == V4SImode
2392 && GET_CODE (op) == CONST_VECTOR
2393 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2394 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2395 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2396 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2397 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2398 op = CONST_VECTOR_ELT (op, 0);
2400 switch (GET_CODE (op))
2404 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
2407 return TARGET_LARGE_MEM
2408 || !legitimate_const (op, 0) ? IC_IL2s : IC_IL1s;
2414 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2415 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2416 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2422 constant_to_array (mode, op, arr);
2424 /* Check that each 4-byte slot is identical. */
2426 for (i = 4; i < 16; i += 4)
2427 for (j = 0; j < 4; j++)
2428 if (arr[j] != arr[i + j])
2433 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2434 val = trunc_int_for_mode (val, SImode);
2436 if (which_immediate_load (val) != SPU_NONE)
2440 /* Any mode of 2 bytes or smaller can be loaded with an il
2442 gcc_assert (GET_MODE_SIZE (mode) > 2);
2445 for (i = 0; i < 16 && fsmbi; i++)
2446 if (arr[i] != 0 && arr[i] != 0xff)
2451 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2464 static enum spu_immediate
2465 which_logical_immediate (HOST_WIDE_INT val)
2467 gcc_assert (val == trunc_int_for_mode (val, SImode));
2469 if (val >= -0x200 && val <= 0x1ff)
2471 if (val >= 0 && val <= 0xffff)
2473 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2475 val = trunc_int_for_mode (val, HImode);
2476 if (val >= -0x200 && val <= 0x1ff)
2478 if ((val & 0xff) == ((val >> 8) & 0xff))
2480 val = trunc_int_for_mode (val, QImode);
2481 if (val >= -0x200 && val <= 0x1ff)
2489 logical_immediate_p (rtx op, enum machine_mode mode)
2492 unsigned char arr[16];
2495 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2496 || GET_CODE (op) == CONST_VECTOR);
2498 if (GET_MODE (op) != VOIDmode)
2499 mode = GET_MODE (op);
2501 constant_to_array (mode, op, arr);
2503 /* Check that bytes are repeated. */
2504 for (i = 4; i < 16; i += 4)
2505 for (j = 0; j < 4; j++)
2506 if (arr[j] != arr[i + j])
2509 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2510 val = trunc_int_for_mode (val, SImode);
2512 i = which_logical_immediate (val);
2513 return i != SPU_NONE && i != SPU_IOHL;
2517 iohl_immediate_p (rtx op, enum machine_mode mode)
2520 unsigned char arr[16];
2523 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2524 || GET_CODE (op) == CONST_VECTOR);
2526 if (GET_MODE (op) != VOIDmode)
2527 mode = GET_MODE (op);
2529 constant_to_array (mode, op, arr);
2531 /* Check that bytes are repeated. */
2532 for (i = 4; i < 16; i += 4)
2533 for (j = 0; j < 4; j++)
2534 if (arr[j] != arr[i + j])
2537 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2538 val = trunc_int_for_mode (val, SImode);
2540 return val >= 0 && val <= 0xffff;
2544 arith_immediate_p (rtx op, enum machine_mode mode,
2545 HOST_WIDE_INT low, HOST_WIDE_INT high)
2548 unsigned char arr[16];
2551 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2552 || GET_CODE (op) == CONST_VECTOR);
2554 if (GET_MODE (op) != VOIDmode)
2555 mode = GET_MODE (op);
2557 constant_to_array (mode, op, arr);
2559 if (VECTOR_MODE_P (mode))
2560 mode = GET_MODE_INNER (mode);
2562 bytes = GET_MODE_SIZE (mode);
2563 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2565 /* Check that bytes are repeated. */
2566 for (i = bytes; i < 16; i += bytes)
2567 for (j = 0; j < bytes; j++)
2568 if (arr[j] != arr[i + j])
2572 for (j = 1; j < bytes; j++)
2573 val = (val << 8) | arr[j];
2575 val = trunc_int_for_mode (val, mode);
2577 return val >= low && val <= high;
2581 - any 32 bit constant (SImode, SFmode)
2582 - any constant that can be generated with fsmbi (any mode)
2583 - a 64 bit constant where the high and low bits are identical
2585 - a 128 bit constant where the four 32 bit words match. */
2587 spu_legitimate_constant_p (rtx x)
2590 /* V4SI with all identical symbols is valid. */
2591 if (GET_MODE (x) == V4SImode
2592 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2593 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2594 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST
2595 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == HIGH))
2596 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2597 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2598 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2600 if (VECTOR_MODE_P (GET_MODE (x)))
2601 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2602 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2603 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2608 /* Valid address are:
2609 - symbol_ref, label_ref, const
2611 - reg + const, where either reg or const is 16 byte aligned
2612 - reg + reg, alignment doesn't matter
2613 The alignment matters in the reg+const case because lqd and stqd
2614 ignore the 4 least significant bits of the const. (TODO: It might be
2615 preferable to allow any alignment and fix it up when splitting.) */
2617 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2618 rtx x, int reg_ok_strict)
2620 if (mode == TImode && GET_CODE (x) == AND
2621 && GET_CODE (XEXP (x, 1)) == CONST_INT
2622 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2624 switch (GET_CODE (x))
2628 return !TARGET_LARGE_MEM;
2631 return !TARGET_LARGE_MEM && legitimate_const (x, 0);
2634 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2638 gcc_assert (GET_CODE (x) == REG);
2641 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2646 rtx op0 = XEXP (x, 0);
2647 rtx op1 = XEXP (x, 1);
2648 if (GET_CODE (op0) == SUBREG)
2649 op0 = XEXP (op0, 0);
2650 if (GET_CODE (op1) == SUBREG)
2651 op1 = XEXP (op1, 0);
2652 /* We can't just accept any aligned register because CSE can
2653 change it to a register that is not marked aligned and then
2654 recog will fail. So we only accept frame registers because
2655 they will only be changed to other frame registers. */
2656 if (GET_CODE (op0) == REG
2657 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2658 && GET_CODE (op1) == CONST_INT
2659 && INTVAL (op1) >= -0x2000
2660 && INTVAL (op1) <= 0x1fff
2661 && (REGNO_PTR_FRAME_P (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2663 if (GET_CODE (op0) == REG
2664 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2665 && GET_CODE (op1) == REG
2666 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2677 /* When the address is reg + const_int, force the const_int into a
2680 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2681 enum machine_mode mode)
2684 /* Make sure both operands are registers. */
2685 if (GET_CODE (x) == PLUS)
2689 if (ALIGNED_SYMBOL_REF_P (op0))
2691 op0 = force_reg (Pmode, op0);
2692 mark_reg_pointer (op0, 128);
2694 else if (GET_CODE (op0) != REG)
2695 op0 = force_reg (Pmode, op0);
2696 if (ALIGNED_SYMBOL_REF_P (op1))
2698 op1 = force_reg (Pmode, op1);
2699 mark_reg_pointer (op1, 128);
2701 else if (GET_CODE (op1) != REG)
2702 op1 = force_reg (Pmode, op1);
2703 x = gen_rtx_PLUS (Pmode, op0, op1);
2704 if (spu_legitimate_address (mode, x, 0))
2710 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2711 struct attribute_spec.handler. */
2713 spu_handle_fndecl_attribute (tree * node,
2715 tree args ATTRIBUTE_UNUSED,
2716 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2718 if (TREE_CODE (*node) != FUNCTION_DECL)
2720 warning (0, "`%s' attribute only applies to functions",
2721 IDENTIFIER_POINTER (name));
2722 *no_add_attrs = true;
2728 /* Handle the "vector" attribute. */
2730 spu_handle_vector_attribute (tree * node, tree name,
2731 tree args ATTRIBUTE_UNUSED,
2732 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2734 tree type = *node, result = NULL_TREE;
2735 enum machine_mode mode;
2738 while (POINTER_TYPE_P (type)
2739 || TREE_CODE (type) == FUNCTION_TYPE
2740 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2741 type = TREE_TYPE (type);
2743 mode = TYPE_MODE (type);
2745 unsigned_p = TYPE_UNSIGNED (type);
2749 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2752 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2755 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
2758 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
2761 result = V4SF_type_node;
2764 result = V2DF_type_node;
2770 /* Propagate qualifiers attached to the element type
2771 onto the vector type. */
2772 if (result && result != type && TYPE_QUALS (type))
2773 result = build_qualified_type (result, TYPE_QUALS (type));
2775 *no_add_attrs = true; /* No need to hang on to the attribute. */
2778 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
2780 *node = reconstruct_complex_type (*node, result);
2785 /* Return non-zero if FUNC is a naked function. */
2787 spu_naked_function_p (tree func)
2791 if (TREE_CODE (func) != FUNCTION_DECL)
2794 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
2795 return a != NULL_TREE;
2799 spu_initial_elimination_offset (int from, int to)
2801 int saved_regs_size = spu_saved_regs_size ();
2803 if (!current_function_is_leaf || current_function_outgoing_args_size
2804 || get_frame_size () || saved_regs_size)
2805 sp_offset = STACK_POINTER_OFFSET;
2806 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2807 return (sp_offset + current_function_outgoing_args_size);
2808 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2810 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2811 return sp_offset + current_function_outgoing_args_size
2812 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
2813 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2814 return get_frame_size () + saved_regs_size + sp_offset;
2819 spu_function_value (tree type, tree func ATTRIBUTE_UNUSED)
2821 enum machine_mode mode = TYPE_MODE (type);
2822 int byte_size = ((mode == BLKmode)
2823 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2825 /* Make sure small structs are left justified in a register. */
2826 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2827 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
2829 enum machine_mode smode;
2832 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2833 int n = byte_size / UNITS_PER_WORD;
2834 v = rtvec_alloc (nregs);
2835 for (i = 0; i < n; i++)
2837 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
2838 gen_rtx_REG (TImode,
2841 GEN_INT (UNITS_PER_WORD * i));
2842 byte_size -= UNITS_PER_WORD;
2850 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2852 gen_rtx_EXPR_LIST (VOIDmode,
2853 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
2854 GEN_INT (UNITS_PER_WORD * n));
2856 return gen_rtx_PARALLEL (mode, v);
2858 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
2862 spu_function_arg (CUMULATIVE_ARGS cum,
2863 enum machine_mode mode,
2864 tree type, int named ATTRIBUTE_UNUSED)
2868 if (cum >= MAX_REGISTER_ARGS)
2871 byte_size = ((mode == BLKmode)
2872 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2874 /* The ABI does not allow parameters to be passed partially in
2875 reg and partially in stack. */
2876 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
2879 /* Make sure small structs are left justified in a register. */
2880 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2881 && byte_size < UNITS_PER_WORD && byte_size > 0)
2883 enum machine_mode smode;
2887 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2888 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2889 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
2891 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
2894 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
2897 /* Variable sized types are passed by reference. */
2899 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
2900 enum machine_mode mode ATTRIBUTE_UNUSED,
2901 tree type, bool named ATTRIBUTE_UNUSED)
2903 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2909 /* Create and return the va_list datatype.
2911 On SPU, va_list is an array type equivalent to
2913 typedef struct __va_list_tag
2915 void *__args __attribute__((__aligned(16)));
2916 void *__skip __attribute__((__aligned(16)));
2920 where __args points to the arg that will be returned by the next
2921 va_arg(), and __skip points to the previous stack frame such that
2922 when __args == __skip we should advance __args by 32 bytes. */
2924 spu_build_builtin_va_list (void)
2926 tree f_args, f_skip, record, type_decl;
2929 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2932 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2934 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
2935 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
2937 DECL_FIELD_CONTEXT (f_args) = record;
2938 DECL_ALIGN (f_args) = 128;
2939 DECL_USER_ALIGN (f_args) = 1;
2941 DECL_FIELD_CONTEXT (f_skip) = record;
2942 DECL_ALIGN (f_skip) = 128;
2943 DECL_USER_ALIGN (f_skip) = 1;
2945 TREE_CHAIN (record) = type_decl;
2946 TYPE_NAME (record) = type_decl;
2947 TYPE_FIELDS (record) = f_args;
2948 TREE_CHAIN (f_args) = f_skip;
2950 /* We know this is being padded and we want it too. It is an internal
2951 type so hide the warnings from the user. */
2953 warn_padded = false;
2955 layout_type (record);
2959 /* The correct type is an array type of one element. */
2960 return build_array_type (record, build_index_type (size_zero_node));
2963 /* Implement va_start by filling the va_list structure VALIST.
2964 NEXTARG points to the first anonymous stack argument.
2966 The following global variables are used to initialize
2967 the va_list structure:
2969 current_function_args_info;
2970 the CUMULATIVE_ARGS for this function
2972 current_function_arg_offset_rtx:
2973 holds the offset of the first anonymous stack argument
2974 (relative to the virtual arg pointer). */
2977 spu_va_start (tree valist, rtx nextarg)
2979 tree f_args, f_skip;
2982 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2983 f_skip = TREE_CHAIN (f_args);
2985 valist = build_va_arg_indirect_ref (valist);
2987 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2989 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
2991 /* Find the __args area. */
2992 t = make_tree (TREE_TYPE (args), nextarg);
2993 if (current_function_pretend_args_size > 0)
2994 t = build2 (PLUS_EXPR, TREE_TYPE (args), t,
2995 build_int_cst (integer_type_node, -STACK_POINTER_OFFSET));
2996 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
2997 TREE_SIDE_EFFECTS (t) = 1;
2998 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3000 /* Find the __skip area. */
3001 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3002 t = build2 (PLUS_EXPR, TREE_TYPE (skip), t,
3003 build_int_cst (integer_type_node,
3004 (current_function_pretend_args_size
3005 - STACK_POINTER_OFFSET)));
3006 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
3007 TREE_SIDE_EFFECTS (t) = 1;
3008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3011 /* Gimplify va_arg by updating the va_list structure
3012 VALIST as required to retrieve an argument of type
3013 TYPE, and returning that argument.
3015 ret = va_arg(VALIST, TYPE);
3017 generates code equivalent to:
3019 paddedsize = (sizeof(TYPE) + 15) & -16;
3020 if (VALIST.__args + paddedsize > VALIST.__skip
3021 && VALIST.__args <= VALIST.__skip)
3022 addr = VALIST.__skip + 32;
3024 addr = VALIST.__args;
3025 VALIST.__args = addr + paddedsize;
3026 ret = *(TYPE *)addr;
3029 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
3030 tree * post_p ATTRIBUTE_UNUSED)
3032 tree f_args, f_skip;
3034 HOST_WIDE_INT size, rsize;
3035 tree paddedsize, addr, tmp;
3036 bool pass_by_reference_p;
3038 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3039 f_skip = TREE_CHAIN (f_args);
3041 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3043 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3045 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3047 addr = create_tmp_var (ptr_type_node, "va_arg");
3048 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3050 /* if an object is dynamically sized, a pointer to it is passed
3051 instead of the object itself. */
3052 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3054 if (pass_by_reference_p)
3055 type = build_pointer_type (type);
3056 size = int_size_in_bytes (type);
3057 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3059 /* build conditional expression to calculate addr. The expression
3060 will be gimplified later. */
3061 paddedsize = fold_convert (ptr_type_node, size_int (rsize));
3062 tmp = build2 (PLUS_EXPR, ptr_type_node, args, paddedsize);
3063 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3064 build2 (GT_EXPR, boolean_type_node, tmp, skip),
3065 build2 (LE_EXPR, boolean_type_node, args, skip));
3067 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3068 build2 (PLUS_EXPR, ptr_type_node, skip,
3069 fold_convert (ptr_type_node, size_int (32))), args);
3071 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
3072 gimplify_and_add (tmp, pre_p);
3074 /* update VALIST.__args */
3075 tmp = build2 (PLUS_EXPR, ptr_type_node, addr, paddedsize);
3076 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
3077 gimplify_and_add (tmp, pre_p);
3079 addr = fold_convert (build_pointer_type (type), addr);
3081 if (pass_by_reference_p)
3082 addr = build_va_arg_indirect_ref (addr);
3084 return build_va_arg_indirect_ref (addr);
3087 /* Save parameter registers starting with the register that corresponds
3088 to the first unnamed parameters. If the first unnamed parameter is
3089 in the stack then save no registers. Set pretend_args_size to the
3090 amount of space needed to save the registers. */
3092 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3093 tree type, int *pretend_size, int no_rtl)
3102 /* cum currently points to the last named argument, we want to
3103 start at the next argument. */
3104 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3106 offset = -STACK_POINTER_OFFSET;
3107 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3109 tmp = gen_frame_mem (V4SImode,
3110 plus_constant (virtual_incoming_args_rtx,
3112 emit_move_insn (tmp,
3113 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3116 *pretend_size = offset + STACK_POINTER_OFFSET;
3121 spu_conditional_register_usage (void)
3125 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3126 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3128 global_regs[INTR_REGNUM] = 1;
3131 /* This is called to decide when we can simplify a load instruction. We
3132 must only return true for registers which we know will always be
3133 aligned. Taking into account that CSE might replace this reg with
3134 another one that has not been marked aligned.
3135 So this is really only true for frame, stack and virtual registers,
3136 which we know are always aligned and should not be adversely effected
3139 regno_aligned_for_load (int regno)
3141 return regno == FRAME_POINTER_REGNUM
3142 || regno == HARD_FRAME_POINTER_REGNUM
3143 || regno == STACK_POINTER_REGNUM
3144 || (regno >= FIRST_VIRTUAL_REGISTER && regno <= LAST_VIRTUAL_REGISTER);
3147 /* Return TRUE when mem is known to be 16-byte aligned. */
3149 aligned_mem_p (rtx mem)
3151 if (MEM_ALIGN (mem) >= 128)
3153 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3155 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3157 rtx p0 = XEXP (XEXP (mem, 0), 0);
3158 rtx p1 = XEXP (XEXP (mem, 0), 1);
3159 if (regno_aligned_for_load (REGNO (p0)))
3161 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3163 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3167 else if (GET_CODE (XEXP (mem, 0)) == REG)
3169 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3172 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3174 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3176 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3177 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3178 if (GET_CODE (p0) == SYMBOL_REF
3179 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3185 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3186 into its SYMBOL_REF_FLAGS. */
3188 spu_encode_section_info (tree decl, rtx rtl, int first)
3190 default_encode_section_info (decl, rtl, first);
3192 /* If a variable has a forced alignment to < 16 bytes, mark it with
3193 SYMBOL_FLAG_ALIGN1. */
3194 if (TREE_CODE (decl) == VAR_DECL
3195 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3196 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3199 /* Return TRUE if we are certain the mem refers to a complete object
3200 which is both 16-byte aligned and padded to a 16-byte boundary. This
3201 would make it safe to store with a single instruction.
3202 We guarantee the alignment and padding for static objects by aligning
3203 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3204 FIXME: We currently cannot guarantee this for objects on the stack
3205 because assign_parm_setup_stack calls assign_stack_local with the
3206 alignment of the parameter mode and in that case the alignment never
3207 gets adjusted by LOCAL_ALIGNMENT. */
3209 store_with_one_insn_p (rtx mem)
3211 rtx addr = XEXP (mem, 0);
3212 if (GET_MODE (mem) == BLKmode)
3214 /* Only static objects. */
3215 if (GET_CODE (addr) == SYMBOL_REF)
3217 /* We use the associated declaration to make sure the access is
3218 referring to the whole object.
3219 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3220 if it is necessary. Will there be cases where one exists, and
3221 the other does not? Will there be cases where both exist, but
3222 have different types? */
3223 tree decl = MEM_EXPR (mem);
3225 && TREE_CODE (decl) == VAR_DECL
3226 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3228 decl = SYMBOL_REF_DECL (addr);
3230 && TREE_CODE (decl) == VAR_DECL
3231 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3238 spu_expand_mov (rtx * ops, enum machine_mode mode)
3240 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3243 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3245 rtx from = SUBREG_REG (ops[1]);
3246 enum machine_mode imode = GET_MODE (from);
3248 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3249 && GET_MODE_CLASS (imode) == MODE_INT
3250 && subreg_lowpart_p (ops[1]));
3252 if (GET_MODE_SIZE (imode) < 4)
3254 from = gen_rtx_SUBREG (SImode, from, 0);
3258 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3260 enum insn_code icode = trunc_optab->handlers[mode][imode].insn_code;
3261 emit_insn (GEN_FCN (icode) (ops[0], from));
3264 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3268 /* At least one of the operands needs to be a register. */
3269 if ((reload_in_progress | reload_completed) == 0
3270 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3272 rtx temp = force_reg (mode, ops[1]);
3273 emit_move_insn (ops[0], temp);
3276 if (reload_in_progress || reload_completed)
3278 if (CONSTANT_P (ops[1]))
3279 return spu_split_immediate (ops);
3284 if (GET_CODE (ops[0]) == MEM)
3286 if (!spu_valid_move (ops))
3288 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3289 gen_reg_rtx (TImode)));
3293 else if (GET_CODE (ops[1]) == MEM)
3295 if (!spu_valid_move (ops))
3298 (ops[0], ops[1], gen_reg_rtx (TImode),
3299 gen_reg_rtx (SImode)));
3303 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3305 if (GET_CODE (ops[1]) == CONST_INT)
3307 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3308 if (val != INTVAL (ops[1]))
3310 emit_move_insn (ops[0], GEN_INT (val));
3321 /* For now, only frame registers are known to be aligned at all times.
3322 We can't trust REGNO_POINTER_ALIGN because optimization will move
3323 registers around, potentially changing an "aligned" register in an
3324 address to an unaligned register, which would result in an invalid
3326 int regno = REGNO (reg);
3327 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3331 spu_split_load (rtx * ops)
3333 enum machine_mode mode = GET_MODE (ops[0]);
3334 rtx addr, load, rot, mem, p0, p1;
3337 addr = XEXP (ops[1], 0);
3341 if (GET_CODE (addr) == PLUS)
3344 aligned reg + aligned reg => lqx
3345 aligned reg + unaligned reg => lqx, rotqby
3346 aligned reg + aligned const => lqd
3347 aligned reg + unaligned const => lqd, rotqbyi
3348 unaligned reg + aligned reg => lqx, rotqby
3349 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3350 unaligned reg + aligned const => lqd, rotqby
3351 unaligned reg + unaligned const -> not allowed by legitimate address
3353 p0 = XEXP (addr, 0);
3354 p1 = XEXP (addr, 1);
3355 if (reg_align (p0) < 128)
3357 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3359 emit_insn (gen_addsi3 (ops[3], p0, p1));
3367 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3369 rot_amt = INTVAL (p1) & 15;
3370 p1 = GEN_INT (INTVAL (p1) & -16);
3371 addr = gen_rtx_PLUS (SImode, p0, p1);
3373 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3377 else if (GET_CODE (addr) == REG)
3379 if (reg_align (addr) < 128)
3382 else if (GET_CODE (addr) == CONST)
3384 if (GET_CODE (XEXP (addr, 0)) == PLUS
3385 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3386 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3388 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3390 addr = gen_rtx_CONST (Pmode,
3391 gen_rtx_PLUS (Pmode,
3392 XEXP (XEXP (addr, 0), 0),
3393 GEN_INT (rot_amt & -16)));
3395 addr = XEXP (XEXP (addr, 0), 0);
3400 else if (GET_CODE (addr) == CONST_INT)
3402 rot_amt = INTVAL (addr);
3403 addr = GEN_INT (rot_amt & -16);
3405 else if (!ALIGNED_SYMBOL_REF_P (addr))
3408 if (GET_MODE_SIZE (mode) < 4)
3409 rot_amt += GET_MODE_SIZE (mode) - 4;
3415 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3422 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3423 mem = change_address (ops[1], TImode, addr);
3425 emit_insn (gen_movti (load, mem));
3428 emit_insn (gen_rotqby_ti (load, load, rot));
3430 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3432 if (reload_completed)
3433 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3435 emit_insn (gen_spu_convert (ops[0], load));
3439 spu_split_store (rtx * ops)
3441 enum machine_mode mode = GET_MODE (ops[0]);
3444 rtx addr, p0, p1, p1_lo, smem;
3448 addr = XEXP (ops[0], 0);
3450 if (GET_CODE (addr) == PLUS)
3453 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3454 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3455 aligned reg + aligned const => lqd, c?d, shuf, stqx
3456 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3457 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3458 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3459 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3460 unaligned reg + unaligned const -> not allowed by legitimate address
3463 p0 = XEXP (addr, 0);
3464 p1 = p1_lo = XEXP (addr, 1);
3465 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3467 p1_lo = GEN_INT (INTVAL (p1) & 15);
3468 p1 = GEN_INT (INTVAL (p1) & -16);
3469 addr = gen_rtx_PLUS (SImode, p0, p1);
3472 else if (GET_CODE (addr) == REG)
3476 p1 = p1_lo = const0_rtx;
3481 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3482 p1 = 0; /* aform doesn't use p1 */
3484 if (ALIGNED_SYMBOL_REF_P (addr))
3486 else if (GET_CODE (addr) == CONST)
3488 if (GET_CODE (XEXP (addr, 0)) == PLUS
3489 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3490 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3492 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3494 addr = gen_rtx_CONST (Pmode,
3495 gen_rtx_PLUS (Pmode,
3496 XEXP (XEXP (addr, 0), 0),
3497 GEN_INT (v & -16)));
3499 addr = XEXP (XEXP (addr, 0), 0);
3500 p1_lo = GEN_INT (v & 15);
3503 else if (GET_CODE (addr) == CONST_INT)
3505 p1_lo = GEN_INT (INTVAL (addr) & 15);
3506 addr = GEN_INT (INTVAL (addr) & -16);
3510 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3512 scalar = store_with_one_insn_p (ops[0]);
3515 /* We could copy the flags from the ops[0] MEM to mem here,
3516 We don't because we want this load to be optimized away if
3517 possible, and copying the flags will prevent that in certain
3518 cases, e.g. consider the volatile flag. */
3520 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3521 set_mem_alias_set (lmem, 0);
3522 emit_insn (gen_movti (reg, lmem));
3524 if (!p0 || reg_align (p0) >= 128)
3525 p0 = stack_pointer_rtx;
3529 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3530 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3532 else if (reload_completed)
3534 if (GET_CODE (ops[1]) == REG)
3535 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3536 else if (GET_CODE (ops[1]) == SUBREG)
3537 emit_move_insn (reg,
3538 gen_rtx_REG (GET_MODE (reg),
3539 REGNO (SUBREG_REG (ops[1]))));
3545 if (GET_CODE (ops[1]) == REG)
3546 emit_insn (gen_spu_convert (reg, ops[1]));
3547 else if (GET_CODE (ops[1]) == SUBREG)
3548 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3553 if (GET_MODE_SIZE (mode) < 4 && scalar)
3554 emit_insn (gen_shlqby_ti
3555 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3557 smem = change_address (ops[0], TImode, addr);
3558 /* We can't use the previous alias set because the memory has changed
3559 size and can potentially overlap objects of other types. */
3560 set_mem_alias_set (smem, 0);
3562 emit_insn (gen_movti (smem, reg));
3565 /* Return TRUE if X is MEM which is a struct member reference
3566 and the member can safely be loaded and stored with a single
3567 instruction because it is padded. */
3569 mem_is_padded_component_ref (rtx x)
3571 tree t = MEM_EXPR (x);
3573 if (!t || TREE_CODE (t) != COMPONENT_REF)
3575 t = TREE_OPERAND (t, 1);
3576 if (!t || TREE_CODE (t) != FIELD_DECL
3577 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3579 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3580 r = DECL_FIELD_CONTEXT (t);
3581 if (!r || TREE_CODE (r) != RECORD_TYPE)
3583 /* Make sure they are the same mode */
3584 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3586 /* If there are no following fields then the field alignment assures
3587 the structure is padded to the alignment which means this field is
3589 if (TREE_CHAIN (t) == 0)
3591 /* If the following field is also aligned then this field will be
3594 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3599 /* Parse the -mfixed-range= option string. */
3601 fix_range (const char *const_str)
3604 char *str, *dash, *comma;
3606 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3607 REG2 are either register names or register numbers. The effect
3608 of this option is to mark the registers in the range from REG1 to
3609 REG2 as ``fixed'' so they won't be used by the compiler. */
3611 i = strlen (const_str);
3612 str = (char *) alloca (i + 1);
3613 memcpy (str, const_str, i + 1);
3617 dash = strchr (str, '-');
3620 warning (0, "value of -mfixed-range must have form REG1-REG2");
3624 comma = strchr (dash + 1, ',');
3628 first = decode_reg_name (str);
3631 warning (0, "unknown register name: %s", str);
3635 last = decode_reg_name (dash + 1);
3638 warning (0, "unknown register name: %s", dash + 1);
3646 warning (0, "%s-%s is an empty range", str, dash + 1);
3650 for (i = first; i <= last; ++i)
3651 fixed_regs[i] = call_used_regs[i] = 1;
3662 spu_valid_move (rtx * ops)
3664 enum machine_mode mode = GET_MODE (ops[0]);
3665 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3668 /* init_expr_once tries to recog against load and store insns to set
3669 the direct_load[] and direct_store[] arrays. We always want to
3670 consider those loads and stores valid. init_expr_once is called in
3671 the context of a dummy function which does not have a decl. */
3672 if (cfun->decl == 0)
3675 /* Don't allows loads/stores which would require more than 1 insn.
3676 During and after reload we assume loads and stores only take 1
3678 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3680 if (GET_CODE (ops[0]) == MEM
3681 && (GET_MODE_SIZE (mode) < 4
3682 || !(store_with_one_insn_p (ops[0])
3683 || mem_is_padded_component_ref (ops[0]))))
3685 if (GET_CODE (ops[1]) == MEM
3686 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3692 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3693 can be generated using the fsmbi instruction. */
3695 fsmbi_const_p (rtx x)
3699 /* We can always choose DImode for CONST_INT because the high bits
3700 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3701 enum immediate_class c = classify_immediate (x, DImode);
3702 return c == IC_FSMBI;
3707 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3708 can be generated using the cbd, chd, cwd or cdd instruction. */
3710 cpat_const_p (rtx x, enum machine_mode mode)
3714 enum immediate_class c = classify_immediate (x, mode);
3715 return c == IC_CPAT;
3721 gen_cpat_const (rtx * ops)
3723 unsigned char dst[16];
3724 int i, offset, shift, isize;
3725 if (GET_CODE (ops[3]) != CONST_INT
3726 || GET_CODE (ops[2]) != CONST_INT
3727 || (GET_CODE (ops[1]) != CONST_INT
3728 && GET_CODE (ops[1]) != REG))
3730 if (GET_CODE (ops[1]) == REG
3731 && (!REG_POINTER (ops[1])
3732 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3735 for (i = 0; i < 16; i++)
3737 isize = INTVAL (ops[3]);
3740 else if (isize == 2)
3744 offset = (INTVAL (ops[2]) +
3745 (GET_CODE (ops[1]) ==
3746 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3747 for (i = 0; i < isize; i++)
3748 dst[offset + i] = i + shift;
3749 return array_to_constant (TImode, dst);
3752 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3753 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3754 than 16 bytes, the value is repeated across the rest of the array. */
3756 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3761 memset (arr, 0, 16);
3762 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3763 if (GET_CODE (x) == CONST_INT
3764 || (GET_CODE (x) == CONST_DOUBLE
3765 && (mode == SFmode || mode == DFmode)))
3767 gcc_assert (mode != VOIDmode && mode != BLKmode);
3769 if (GET_CODE (x) == CONST_DOUBLE)
3770 val = const_double_to_hwint (x);
3773 first = GET_MODE_SIZE (mode) - 1;
3774 for (i = first; i >= 0; i--)
3776 arr[i] = val & 0xff;
3779 /* Splat the constant across the whole array. */
3780 for (j = 0, i = first + 1; i < 16; i++)
3783 j = (j == first) ? 0 : j + 1;
3786 else if (GET_CODE (x) == CONST_DOUBLE)
3788 val = CONST_DOUBLE_LOW (x);
3789 for (i = 15; i >= 8; i--)
3791 arr[i] = val & 0xff;
3794 val = CONST_DOUBLE_HIGH (x);
3795 for (i = 7; i >= 0; i--)
3797 arr[i] = val & 0xff;
3801 else if (GET_CODE (x) == CONST_VECTOR)
3805 mode = GET_MODE_INNER (mode);
3806 units = CONST_VECTOR_NUNITS (x);
3807 for (i = 0; i < units; i++)
3809 elt = CONST_VECTOR_ELT (x, i);
3810 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
3812 if (GET_CODE (elt) == CONST_DOUBLE)
3813 val = const_double_to_hwint (elt);
3816 first = GET_MODE_SIZE (mode) - 1;
3817 if (first + i * GET_MODE_SIZE (mode) > 16)
3819 for (j = first; j >= 0; j--)
3821 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
3831 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
3832 smaller than 16 bytes, use the bytes that would represent that value
3833 in a register, e.g., for QImode return the value of arr[3]. */
3835 array_to_constant (enum machine_mode mode, unsigned char arr[16])
3837 enum machine_mode inner_mode;
3839 int units, size, i, j, k;
3842 if (GET_MODE_CLASS (mode) == MODE_INT
3843 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3845 j = GET_MODE_SIZE (mode);
3846 i = j < 4 ? 4 - j : 0;
3847 for (val = 0; i < j; i++)
3848 val = (val << 8) | arr[i];
3849 val = trunc_int_for_mode (val, mode);
3850 return GEN_INT (val);
3856 for (i = high = 0; i < 8; i++)
3857 high = (high << 8) | arr[i];
3858 for (i = 8, val = 0; i < 16; i++)
3859 val = (val << 8) | arr[i];
3860 return immed_double_const (val, high, TImode);
3864 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3865 val = trunc_int_for_mode (val, SImode);
3866 return hwint_to_const_double (SFmode, val);
3870 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3872 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
3873 return hwint_to_const_double (DFmode, val);
3876 if (!VECTOR_MODE_P (mode))
3879 units = GET_MODE_NUNITS (mode);
3880 size = GET_MODE_UNIT_SIZE (mode);
3881 inner_mode = GET_MODE_INNER (mode);
3882 v = rtvec_alloc (units);
3884 for (k = i = 0; i < units; ++i)
3887 for (j = 0; j < size; j++, k++)
3888 val = (val << 8) | arr[k];
3890 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
3891 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
3893 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
3898 return gen_rtx_CONST_VECTOR (mode, v);
3902 reloc_diagnostic (rtx x)
3904 tree loc_decl, decl = 0;
3906 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
3909 if (GET_CODE (x) == SYMBOL_REF)
3910 decl = SYMBOL_REF_DECL (x);
3911 else if (GET_CODE (x) == CONST
3912 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3913 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
3915 /* SYMBOL_REF_DECL is not necessarily a DECL. */
3916 if (decl && !DECL_P (decl))
3919 /* We use last_assemble_variable_decl to get line information. It's
3920 not always going to be right and might not even be close, but will
3921 be right for the more common cases. */
3922 if (!last_assemble_variable_decl)
3925 loc_decl = last_assemble_variable_decl;
3927 /* The decl could be a string constant. */
3928 if (decl && DECL_P (decl))
3929 msg = "%Jcreating run-time relocation for %qD";
3931 msg = "creating run-time relocation";
3933 if (TARGET_WARN_RELOC)
3934 warning (0, msg, loc_decl, decl);
3936 error (msg, loc_decl, decl);
3939 /* Hook into assemble_integer so we can generate an error for run-time
3940 relocations. The SPU ABI disallows them. */
3942 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
3944 /* By default run-time relocations aren't supported, but we allow them
3945 in case users support it in their own run-time loader. And we provide
3946 a warning for those users that don't. */
3947 if ((GET_CODE (x) == SYMBOL_REF)
3948 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
3949 reloc_diagnostic (x);
3951 return default_assemble_integer (x, size, aligned_p);
3955 spu_asm_globalize_label (FILE * file, const char *name)
3957 fputs ("\t.global\t", file);
3958 assemble_name (file, name);
3963 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
3965 enum machine_mode mode = GET_MODE (x);
3966 int cost = COSTS_N_INSNS (2);
3968 /* Folding to a CONST_VECTOR will use extra space but there might
3969 be only a small savings in cycles. We'd like to use a CONST_VECTOR
3970 only if it allows us to fold away multiple insns. Changing the cost
3971 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
3972 because this cost will only be compared against a single insn.
3973 if (code == CONST_VECTOR)
3974 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
3977 /* Use defaults for float operations. Not accurate but good enough. */
3980 *total = COSTS_N_INSNS (13);
3985 *total = COSTS_N_INSNS (6);
3991 if (satisfies_constraint_K (x))
3993 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
3994 *total = COSTS_N_INSNS (1);
3996 *total = COSTS_N_INSNS (3);
4000 *total = COSTS_N_INSNS (3);
4005 *total = COSTS_N_INSNS (0);
4009 *total = COSTS_N_INSNS (5);
4013 case FLOAT_TRUNCATE:
4015 case UNSIGNED_FLOAT:
4018 *total = COSTS_N_INSNS (7);
4024 *total = COSTS_N_INSNS (9);
4031 GET_CODE (XEXP (x, 0)) ==
4032 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4033 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4035 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4037 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4038 cost = COSTS_N_INSNS (14);
4039 if ((val & 0xffff) == 0)
4040 cost = COSTS_N_INSNS (9);
4041 else if (val > 0 && val < 0x10000)
4042 cost = COSTS_N_INSNS (11);
4051 *total = COSTS_N_INSNS (20);
4058 *total = COSTS_N_INSNS (4);
4061 if (XINT (x, 1) == UNSPEC_CONVERT)
4062 *total = COSTS_N_INSNS (0);
4064 *total = COSTS_N_INSNS (4);
4067 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4068 if (GET_MODE_CLASS (mode) == MODE_INT
4069 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4070 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4071 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4077 spu_eh_return_filter_mode (void)
4079 /* We would like this to be SImode, but sjlj exceptions seems to work
4080 only with word_mode. */
4084 /* Decide whether we can make a sibling call to a function. DECL is the
4085 declaration of the function being targeted by the call and EXP is the
4086 CALL_EXPR representing the call. */
4088 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4090 return decl && !TARGET_LARGE_MEM;
4093 /* We need to correctly update the back chain pointer and the Available
4094 Stack Size (which is in the second slot of the sp register.) */
4096 spu_allocate_stack (rtx op0, rtx op1)
4099 rtx chain = gen_reg_rtx (V4SImode);
4100 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4101 rtx sp = gen_reg_rtx (V4SImode);
4102 rtx splatted = gen_reg_rtx (V4SImode);
4103 rtx pat = gen_reg_rtx (TImode);
4105 /* copy the back chain so we can save it back again. */
4106 emit_move_insn (chain, stack_bot);
4108 op1 = force_reg (SImode, op1);
4110 v = 0x1020300010203ll;
4111 emit_move_insn (pat, immed_double_const (v, v, TImode));
4112 emit_insn (gen_shufb (splatted, op1, op1, pat));
4114 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4115 emit_insn (gen_subv4si3 (sp, sp, splatted));
4117 if (flag_stack_check)
4119 rtx avail = gen_reg_rtx(SImode);
4120 rtx result = gen_reg_rtx(SImode);
4121 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4122 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4123 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4126 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4128 emit_move_insn (stack_bot, chain);
4130 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4134 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4136 static unsigned char arr[16] =
4137 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4138 rtx temp = gen_reg_rtx (SImode);
4139 rtx temp2 = gen_reg_rtx (SImode);
4140 rtx temp3 = gen_reg_rtx (V4SImode);
4141 rtx temp4 = gen_reg_rtx (V4SImode);
4142 rtx pat = gen_reg_rtx (TImode);
4143 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4145 /* Restore the backchain from the first word, sp from the second. */
4146 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4147 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4149 emit_move_insn (pat, array_to_constant (TImode, arr));
4151 /* Compute Available Stack Size for sp */
4152 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4153 emit_insn (gen_shufb (temp3, temp, temp, pat));
4155 /* Compute Available Stack Size for back chain */
4156 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4157 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4158 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4160 emit_insn (gen_addv4si3 (sp, sp, temp3));
4161 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4165 spu_init_libfuncs (void)
4167 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4168 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4169 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4170 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4171 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4172 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4173 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4174 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4175 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4176 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4177 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4179 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4180 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
4183 /* Make a subreg, stripping any existing subreg. We could possibly just
4184 call simplify_subreg, but in this case we know what we want. */
4186 spu_gen_subreg (enum machine_mode mode, rtx x)
4188 if (GET_CODE (x) == SUBREG)
4190 if (GET_MODE (x) == mode)
4192 return gen_rtx_SUBREG (mode, x, 0);
4196 spu_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
4198 return (TYPE_MODE (type) == BLKmode
4200 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4201 || int_size_in_bytes (type) >
4202 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4205 /* Create the built-in types and functions */
4207 struct spu_builtin_description spu_builtins[] = {
4208 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4209 {fcode, icode, name, type, params, NULL_TREE},
4210 #include "spu-builtins.def"
4215 spu_init_builtins (void)
4217 struct spu_builtin_description *d;
4220 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4221 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4222 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4223 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4224 V4SF_type_node = build_vector_type (float_type_node, 4);
4225 V2DF_type_node = build_vector_type (double_type_node, 2);
4227 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4228 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4229 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4230 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4232 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
4234 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4235 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4236 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4237 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4238 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4239 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4240 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4241 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4242 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4243 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4244 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4245 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4247 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4248 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4249 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4250 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4251 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4252 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4253 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4254 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4256 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4257 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4259 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4261 spu_builtin_types[SPU_BTI_PTR] =
4262 build_pointer_type (build_qualified_type
4264 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4266 /* For each builtin we build a new prototype. The tree code will make
4267 sure nodes are shared. */
4268 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4271 char name[64]; /* build_function will make a copy. */
4277 /* find last parm */
4278 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4284 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4286 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4288 sprintf (name, "__builtin_%s", d->name);
4290 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4296 spu_safe_dma (HOST_WIDE_INT channel)
4298 return (channel >= 21 && channel <= 27);
4302 spu_builtin_splats (rtx ops[])
4304 enum machine_mode mode = GET_MODE (ops[0]);
4305 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4307 unsigned char arr[16];
4308 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4309 emit_move_insn (ops[0], array_to_constant (mode, arr));
4311 else if (GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4313 rtvec v = rtvec_alloc (4);
4314 RTVEC_ELT (v, 0) = ops[1];
4315 RTVEC_ELT (v, 1) = ops[1];
4316 RTVEC_ELT (v, 2) = ops[1];
4317 RTVEC_ELT (v, 3) = ops[1];
4318 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4322 rtx reg = gen_reg_rtx (TImode);
4324 if (GET_CODE (ops[1]) != REG
4325 && GET_CODE (ops[1]) != SUBREG)
4326 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4332 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4338 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4343 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4348 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4354 emit_move_insn (reg, shuf);
4355 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4360 spu_builtin_extract (rtx ops[])
4362 enum machine_mode mode;
4365 mode = GET_MODE (ops[1]);
4367 if (GET_CODE (ops[2]) == CONST_INT)
4372 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4375 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4378 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4381 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4384 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4387 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4395 from = spu_gen_subreg (TImode, ops[1]);
4396 rot = gen_reg_rtx (TImode);
4397 tmp = gen_reg_rtx (SImode);
4402 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4405 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4406 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4410 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4414 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4419 emit_insn (gen_rotqby_ti (rot, from, tmp));
4421 emit_insn (gen_spu_convert (ops[0], rot));
4425 spu_builtin_insert (rtx ops[])
4427 enum machine_mode mode = GET_MODE (ops[0]);
4428 enum machine_mode imode = GET_MODE_INNER (mode);
4429 rtx mask = gen_reg_rtx (TImode);
4432 if (GET_CODE (ops[3]) == CONST_INT)
4433 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4436 offset = gen_reg_rtx (SImode);
4437 emit_insn (gen_mulsi3
4438 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4441 (mask, stack_pointer_rtx, offset,
4442 GEN_INT (GET_MODE_SIZE (imode))));
4443 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4447 spu_builtin_promote (rtx ops[])
4449 enum machine_mode mode, imode;
4450 rtx rot, from, offset;
4453 mode = GET_MODE (ops[0]);
4454 imode = GET_MODE_INNER (mode);
4456 from = gen_reg_rtx (TImode);
4457 rot = spu_gen_subreg (TImode, ops[0]);
4459 emit_insn (gen_spu_convert (from, ops[1]));
4461 if (GET_CODE (ops[2]) == CONST_INT)
4463 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4464 if (GET_MODE_SIZE (imode) < 4)
4465 pos += 4 - GET_MODE_SIZE (imode);
4466 offset = GEN_INT (pos & 15);
4470 offset = gen_reg_rtx (SImode);
4474 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4477 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4478 emit_insn (gen_addsi3 (offset, offset, offset));
4482 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4483 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4487 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4493 emit_insn (gen_rotqby_ti (rot, from, offset));
4497 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4499 rtx shuf = gen_reg_rtx (V4SImode);
4500 rtx insn = gen_reg_rtx (V4SImode);
4505 fnaddr = force_reg (SImode, fnaddr);
4506 cxt = force_reg (SImode, cxt);
4508 if (TARGET_LARGE_MEM)
4510 rtx rotl = gen_reg_rtx (V4SImode);
4511 rtx mask = gen_reg_rtx (V4SImode);
4512 rtx bi = gen_reg_rtx (SImode);
4513 unsigned char shufa[16] = {
4514 2, 3, 0, 1, 18, 19, 16, 17,
4515 0, 1, 2, 3, 16, 17, 18, 19
4517 unsigned char insna[16] = {
4519 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4521 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4524 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4525 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4527 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4528 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4529 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4530 emit_insn (gen_selb (insn, insnc, rotl, mask));
4532 mem = memory_address (Pmode, tramp);
4533 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4535 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4536 mem = memory_address (Pmode, plus_constant (tramp, 16));
4537 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4541 rtx scxt = gen_reg_rtx (SImode);
4542 rtx sfnaddr = gen_reg_rtx (SImode);
4543 unsigned char insna[16] = {
4544 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4550 shufc = gen_reg_rtx (TImode);
4551 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4553 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4554 fits 18 bits and the last 4 are zeros. This will be true if
4555 the stack pointer is initialized to 0x3fff0 at program start,
4556 otherwise the ila instruction will be garbage. */
4558 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4559 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4561 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4562 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4563 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4565 mem = memory_address (Pmode, tramp);
4566 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4569 emit_insn (gen_sync ());
4573 spu_expand_sign_extend (rtx ops[])
4575 unsigned char arr[16];
4576 rtx pat = gen_reg_rtx (TImode);
4579 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4580 if (GET_MODE (ops[1]) == QImode)
4582 sign = gen_reg_rtx (HImode);
4583 emit_insn (gen_extendqihi2 (sign, ops[1]));
4584 for (i = 0; i < 16; i++)
4590 for (i = 0; i < 16; i++)
4592 switch (GET_MODE (ops[1]))
4595 sign = gen_reg_rtx (SImode);
4596 emit_insn (gen_extendhisi2 (sign, ops[1]));
4598 arr[last - 1] = 0x02;
4601 sign = gen_reg_rtx (SImode);
4602 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4603 for (i = 0; i < 4; i++)
4604 arr[last - i] = 3 - i;
4607 sign = gen_reg_rtx (SImode);
4608 c = gen_reg_rtx (SImode);
4609 emit_insn (gen_spu_convert (c, ops[1]));
4610 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4611 for (i = 0; i < 8; i++)
4612 arr[last - i] = 7 - i;
4618 emit_move_insn (pat, array_to_constant (TImode, arr));
4619 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4622 /* expand vector initialization. If there are any constant parts,
4623 load constant parts first. Then load any non-constant parts. */
4625 spu_expand_vector_init (rtx target, rtx vals)
4627 enum machine_mode mode = GET_MODE (target);
4628 int n_elts = GET_MODE_NUNITS (mode);
4630 bool all_same = true;
4631 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
4634 first = XVECEXP (vals, 0, 0);
4635 for (i = 0; i < n_elts; ++i)
4637 x = XVECEXP (vals, 0, i);
4638 if (!CONSTANT_P (x))
4642 if (first_constant == NULL_RTX)
4645 if (i > 0 && !rtx_equal_p (x, first))
4649 /* if all elements are the same, use splats to repeat elements */
4652 if (!CONSTANT_P (first)
4653 && !register_operand (first, GET_MODE (x)))
4654 first = force_reg (GET_MODE (first), first);
4655 emit_insn (gen_spu_splats (target, first));
4659 /* load constant parts */
4660 if (n_var != n_elts)
4664 emit_move_insn (target,
4665 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4669 rtx constant_parts_rtx = copy_rtx (vals);
4671 gcc_assert (first_constant != NULL_RTX);
4672 /* fill empty slots with the first constant, this increases
4673 our chance of using splats in the recursive call below. */
4674 for (i = 0; i < n_elts; ++i)
4675 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4676 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4678 spu_expand_vector_init (target, constant_parts_rtx);
4682 /* load variable parts */
4685 rtx insert_operands[4];
4687 insert_operands[0] = target;
4688 insert_operands[2] = target;
4689 for (i = 0; i < n_elts; ++i)
4691 x = XVECEXP (vals, 0, i);
4692 if (!CONSTANT_P (x))
4694 if (!register_operand (x, GET_MODE (x)))
4695 x = force_reg (GET_MODE (x), x);
4696 insert_operands[1] = x;
4697 insert_operands[3] = GEN_INT (i);
4698 spu_builtin_insert (insert_operands);
4705 spu_force_reg (enum machine_mode mode, rtx op)
4708 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
4710 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
4711 || GET_MODE (op) == BLKmode)
4712 return force_reg (mode, convert_to_mode (mode, op, 0));
4716 r = force_reg (GET_MODE (op), op);
4717 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
4719 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
4724 x = gen_reg_rtx (mode);
4725 emit_insn (gen_spu_convert (x, r));
4730 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
4732 HOST_WIDE_INT v = 0;
4734 /* Check the range of immediate operands. */
4735 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
4737 int range = p - SPU_BTI_7;
4738 if (!CONSTANT_P (op)
4739 || (GET_CODE (op) == CONST_INT
4740 && (INTVAL (op) < spu_builtin_range[range].low
4741 || INTVAL (op) > spu_builtin_range[range].high)))
4742 error ("%s expects an integer literal in the range [%d, %d].",
4744 spu_builtin_range[range].low, spu_builtin_range[range].high);
4746 if (GET_CODE (op) == CONST
4747 && (GET_CODE (XEXP (op, 0)) == PLUS
4748 || GET_CODE (XEXP (op, 0)) == MINUS))
4750 v = INTVAL (XEXP (XEXP (op, 0), 1));
4751 op = XEXP (XEXP (op, 0), 0);
4753 else if (GET_CODE (op) == CONST_INT)
4762 /* This is only used in lqa, and stqa. Even though the insns
4763 encode 16 bits of the address (all but the 2 least
4764 significant), only 14 bits are used because it is masked to
4765 be 16 byte aligned. */
4769 /* This is used for lqr and stqr. */
4776 if (GET_CODE (op) == LABEL_REF
4777 || (GET_CODE (op) == SYMBOL_REF
4778 && SYMBOL_REF_FUNCTION_P (op))
4779 || (INTVAL (op) & ((1 << lsbits) - 1)) != 0)
4780 warning (0, "%d least significant bits of %s are ignored.", lsbits,
4787 expand_builtin_args (struct spu_builtin_description *d, tree arglist,
4788 rtx target, rtx ops[])
4790 enum insn_code icode = d->icode;
4793 /* Expand the arguments into rtl. */
4795 if (d->parm[0] != SPU_BTI_VOID)
4798 for (; i < insn_data[icode].n_operands; i++)
4800 tree arg = TREE_VALUE (arglist);
4803 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
4804 arglist = TREE_CHAIN (arglist);
4809 spu_expand_builtin_1 (struct spu_builtin_description *d,
4810 tree arglist, rtx target)
4814 enum insn_code icode = d->icode;
4815 enum machine_mode mode, tmode;
4819 /* Set up ops[] with values from arglist. */
4820 expand_builtin_args (d, arglist, target, ops);
4822 /* Handle the target operand which must be operand 0. */
4824 if (d->parm[0] != SPU_BTI_VOID)
4827 /* We prefer the mode specified for the match_operand otherwise
4828 use the mode from the builtin function prototype. */
4829 tmode = insn_data[d->icode].operand[0].mode;
4830 if (tmode == VOIDmode)
4831 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
4833 /* Try to use target because not using it can lead to extra copies
4834 and when we are using all of the registers extra copies leads
4836 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
4839 target = ops[0] = gen_reg_rtx (tmode);
4841 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
4847 /* Ignore align_hint, but still expand it's args in case they have
4849 if (icode == CODE_FOR_spu_align_hint)
4852 /* Handle the rest of the operands. */
4853 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
4855 if (insn_data[d->icode].operand[i].mode != VOIDmode)
4856 mode = insn_data[d->icode].operand[i].mode;
4858 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
4860 /* mode can be VOIDmode here for labels */
4862 /* For specific intrinsics with an immediate operand, e.g.,
4863 si_ai(), we sometimes need to convert the scalar argument to a
4864 vector argument by splatting the scalar. */
4865 if (VECTOR_MODE_P (mode)
4866 && (GET_CODE (ops[i]) == CONST_INT
4867 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
4868 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
4870 if (GET_CODE (ops[i]) == CONST_INT)
4871 ops[i] = spu_const (mode, INTVAL (ops[i]));
4874 rtx reg = gen_reg_rtx (mode);
4875 enum machine_mode imode = GET_MODE_INNER (mode);
4876 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
4877 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
4878 if (imode != GET_MODE (ops[i]))
4879 ops[i] = convert_to_mode (imode, ops[i],
4880 TYPE_UNSIGNED (spu_builtin_types
4882 emit_insn (gen_spu_splats (reg, ops[i]));
4887 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
4888 ops[i] = spu_force_reg (mode, ops[i]);
4890 spu_check_builtin_parm (d, ops[i], d->parm[p]);
4893 switch (insn_data[icode].n_operands)
4896 pat = GEN_FCN (icode) (0);
4899 pat = GEN_FCN (icode) (ops[0]);
4902 pat = GEN_FCN (icode) (ops[0], ops[1]);
4905 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
4908 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
4911 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
4914 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
4923 if (d->type == B_CALL || d->type == B_BISLED)
4924 emit_call_insn (pat);
4925 else if (d->type == B_JUMP)
4927 emit_jump_insn (pat);
4933 return_type = spu_builtin_types[d->parm[0]];
4934 if (d->parm[0] != SPU_BTI_VOID
4935 && GET_MODE (target) != TYPE_MODE (return_type))
4937 /* target is the return value. It should always be the mode of
4938 the builtin function prototype. */
4939 target = spu_force_reg (TYPE_MODE (return_type), target);
4946 spu_expand_builtin (tree exp,
4948 rtx subtarget ATTRIBUTE_UNUSED,
4949 enum machine_mode mode ATTRIBUTE_UNUSED,
4950 int ignore ATTRIBUTE_UNUSED)
4952 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
4953 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
4954 tree arglist = TREE_OPERAND (exp, 1);
4955 struct spu_builtin_description *d;
4957 if (fcode < NUM_SPU_BUILTINS)
4959 d = &spu_builtins[fcode];
4961 return spu_expand_builtin_1 (d, arglist, target);