1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
54 #include "tm-constrs.h"
60 /* Builtin types, data and prototypes. */
62 enum spu_builtin_type_index
64 SPU_BTI_END_OF_PARAMS,
66 /* We create new type nodes for these. */
78 /* A 16-byte type. (Implemented with V16QI_type_node) */
81 /* These all correspond to intSI_type_node */
95 /* These correspond to the standard types */
115 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
116 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
117 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
118 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
119 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
120 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
121 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
122 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
123 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
124 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
126 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
128 struct spu_builtin_range
133 static struct spu_builtin_range spu_builtin_range[] = {
134 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
135 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
136 {0ll, 0x7fll}, /* SPU_BTI_U7 */
137 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
138 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
139 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
140 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
141 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
142 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
143 {0ll, 0xffffll}, /* SPU_BTI_U16 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
145 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
149 /* Target specific attribute specifications. */
150 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
152 /* Prototypes and external defs. */
153 static void spu_init_builtins (void);
154 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
155 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
156 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
157 static rtx get_pic_reg (void);
158 static int need_to_save_reg (int regno, int saving);
159 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
160 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
161 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
163 static void emit_nop_for_insn (rtx insn);
164 static bool insn_clobbers_hbr (rtx insn);
165 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
166 int distance, sbitmap blocks);
167 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
168 enum machine_mode dmode);
169 static rtx get_branch_target (rtx branch);
170 static void spu_machine_dependent_reorg (void);
171 static int spu_sched_issue_rate (void);
172 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
174 static int get_pipe (rtx insn);
175 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
176 static void spu_sched_init_global (FILE *, int, int);
177 static void spu_sched_init (FILE *, int, int);
178 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
179 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
181 unsigned char *no_add_attrs);
182 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
184 unsigned char *no_add_attrs);
185 static int spu_naked_function_p (tree func);
186 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
187 const_tree type, unsigned char named);
188 static tree spu_build_builtin_va_list (void);
189 static void spu_va_start (tree, rtx);
190 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
191 gimple_seq * pre_p, gimple_seq * post_p);
192 static int regno_aligned_for_load (int regno);
193 static int store_with_one_insn_p (rtx mem);
194 static int mem_is_padded_component_ref (rtx x);
195 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
196 static void spu_asm_globalize_label (FILE * file, const char *name);
197 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
198 int *total, bool speed);
199 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
200 static void spu_init_libfuncs (void);
201 static bool spu_return_in_memory (const_tree type, const_tree fntype);
202 static void fix_range (const char *);
203 static void spu_encode_section_info (tree, rtx, int);
204 static tree spu_builtin_mul_widen_even (tree);
205 static tree spu_builtin_mul_widen_odd (tree);
206 static tree spu_builtin_mask_for_load (void);
207 static int spu_builtin_vectorization_cost (bool);
208 static bool spu_vector_alignment_reachable (const_tree, bool);
209 static tree spu_builtin_vec_perm (tree, tree *);
210 static int spu_sms_res_mii (struct ddg *g);
211 static void asm_file_start (void);
212 static unsigned int spu_section_type_flags (tree, const char *, int);
214 extern const char *reg_names[];
215 rtx spu_compare_op0, spu_compare_op1;
217 /* Which instruction set architecture to use. */
219 /* Which cpu are we tuning for. */
222 /* The hardware requires 8 insns between a hint and the branch it
223 effects. This variable describes how many rtl instructions the
224 compiler needs to see before inserting a hint, and then the compiler
225 will insert enough nops to make it at least 8 insns. The default is
226 for the compiler to allow up to 2 nops be emitted. The nops are
227 inserted in pairs, so we round down. */
228 int spu_hint_dist = (8*4) - (2*4);
230 /* Determines whether we run variable tracking in machine dependent
232 static int spu_flag_var_tracking;
247 IC_POOL, /* constant pool */
248 IC_IL1, /* one il* instruction */
249 IC_IL2, /* both ilhu and iohl instructions */
250 IC_IL1s, /* one il* instruction */
251 IC_IL2s, /* both ilhu and iohl instructions */
252 IC_FSMBI, /* the fsmbi instruction */
253 IC_CPAT, /* one of the c*d instructions */
254 IC_FSMBI2 /* fsmbi plus 1 other instruction */
257 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
258 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
259 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
260 static enum immediate_class classify_immediate (rtx op,
261 enum machine_mode mode);
263 static enum machine_mode spu_unwind_word_mode (void);
265 static enum machine_mode
266 spu_libgcc_cmp_return_mode (void);
268 static enum machine_mode
269 spu_libgcc_shift_count_mode (void);
272 /* TARGET overrides. */
274 #undef TARGET_INIT_BUILTINS
275 #define TARGET_INIT_BUILTINS spu_init_builtins
277 #undef TARGET_EXPAND_BUILTIN
278 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
280 #undef TARGET_UNWIND_WORD_MODE
281 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
283 /* The .8byte directive doesn't seem to work well for a 32 bit
285 #undef TARGET_ASM_UNALIGNED_DI_OP
286 #define TARGET_ASM_UNALIGNED_DI_OP NULL
288 #undef TARGET_RTX_COSTS
289 #define TARGET_RTX_COSTS spu_rtx_costs
291 #undef TARGET_ADDRESS_COST
292 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
294 #undef TARGET_SCHED_ISSUE_RATE
295 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
297 #undef TARGET_SCHED_INIT_GLOBAL
298 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
300 #undef TARGET_SCHED_INIT
301 #define TARGET_SCHED_INIT spu_sched_init
303 #undef TARGET_SCHED_VARIABLE_ISSUE
304 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
306 #undef TARGET_SCHED_REORDER
307 #define TARGET_SCHED_REORDER spu_sched_reorder
309 #undef TARGET_SCHED_REORDER2
310 #define TARGET_SCHED_REORDER2 spu_sched_reorder
312 #undef TARGET_SCHED_ADJUST_COST
313 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
315 const struct attribute_spec spu_attribute_table[];
316 #undef TARGET_ATTRIBUTE_TABLE
317 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
319 #undef TARGET_ASM_INTEGER
320 #define TARGET_ASM_INTEGER spu_assemble_integer
322 #undef TARGET_SCALAR_MODE_SUPPORTED_P
323 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
325 #undef TARGET_VECTOR_MODE_SUPPORTED_P
326 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
328 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
329 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
331 #undef TARGET_ASM_GLOBALIZE_LABEL
332 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
334 #undef TARGET_PASS_BY_REFERENCE
335 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
337 #undef TARGET_MUST_PASS_IN_STACK
338 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
340 #undef TARGET_BUILD_BUILTIN_VA_LIST
341 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
343 #undef TARGET_EXPAND_BUILTIN_VA_START
344 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
346 #undef TARGET_SETUP_INCOMING_VARARGS
347 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
349 #undef TARGET_MACHINE_DEPENDENT_REORG
350 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
352 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
353 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
355 #undef TARGET_DEFAULT_TARGET_FLAGS
356 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
358 #undef TARGET_INIT_LIBFUNCS
359 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
361 #undef TARGET_RETURN_IN_MEMORY
362 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
364 #undef TARGET_ENCODE_SECTION_INFO
365 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
367 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
368 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
370 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
371 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
373 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
374 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
376 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
377 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
379 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
380 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
382 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
383 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
385 #undef TARGET_LIBGCC_CMP_RETURN_MODE
386 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
388 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
389 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
391 #undef TARGET_SCHED_SMS_RES_MII
392 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
394 #undef TARGET_ASM_FILE_START
395 #define TARGET_ASM_FILE_START asm_file_start
397 #undef TARGET_SECTION_TYPE_FLAGS
398 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
400 struct gcc_target targetm = TARGET_INITIALIZER;
403 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
405 /* Override some of the default param values. With so many registers
406 larger values are better for these params. */
407 MAX_PENDING_LIST_LENGTH = 128;
409 /* With so many registers this is better on by default. */
410 flag_rename_registers = 1;
413 /* Sometimes certain combinations of command options do not make sense
414 on a particular target machine. You can define a macro
415 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
416 executed once just after all the command options have been parsed. */
418 spu_override_options (void)
420 /* Small loops will be unpeeled at -O3. For SPU it is more important
421 to keep code small by default. */
422 if (!flag_unroll_loops && !flag_peel_loops
423 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
424 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
426 flag_omit_frame_pointer = 1;
428 /* Functions must be 8 byte aligned so we correctly handle dual issue */
429 if (align_functions < 8)
432 spu_hint_dist = 8*4 - spu_max_nops*4;
433 if (spu_hint_dist < 0)
436 if (spu_fixed_range_string)
437 fix_range (spu_fixed_range_string);
439 /* Determine processor architectural level. */
442 if (strcmp (&spu_arch_string[0], "cell") == 0)
443 spu_arch = PROCESSOR_CELL;
444 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
445 spu_arch = PROCESSOR_CELLEDP;
447 error ("Unknown architecture '%s'", &spu_arch_string[0]);
450 /* Determine processor to tune for. */
453 if (strcmp (&spu_tune_string[0], "cell") == 0)
454 spu_tune = PROCESSOR_CELL;
455 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
456 spu_tune = PROCESSOR_CELLEDP;
458 error ("Unknown architecture '%s'", &spu_tune_string[0]);
461 /* Change defaults according to the processor architecture. */
462 if (spu_arch == PROCESSOR_CELLEDP)
464 /* If no command line option has been otherwise specified, change
465 the default to -mno-safe-hints on celledp -- only the original
466 Cell/B.E. processors require this workaround. */
467 if (!(target_flags_explicit & MASK_SAFE_HINTS))
468 target_flags &= ~MASK_SAFE_HINTS;
471 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
474 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
475 struct attribute_spec.handler. */
477 /* Table of machine attributes. */
478 const struct attribute_spec spu_attribute_table[] =
480 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
481 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
482 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
483 { NULL, 0, 0, false, false, false, NULL }
486 /* True if MODE is valid for the target. By "valid", we mean able to
487 be manipulated in non-trivial ways. In particular, this means all
488 the arithmetic is supported. */
490 spu_scalar_mode_supported_p (enum machine_mode mode)
508 /* Similarly for vector modes. "Supported" here is less strict. At
509 least some operations are supported; need to check optabs or builtins
510 for further details. */
512 spu_vector_mode_supported_p (enum machine_mode mode)
529 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
530 least significant bytes of the outer mode. This function returns
531 TRUE for the SUBREG's where this is correct. */
533 valid_subreg (rtx op)
535 enum machine_mode om = GET_MODE (op);
536 enum machine_mode im = GET_MODE (SUBREG_REG (op));
537 return om != VOIDmode && im != VOIDmode
538 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
539 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
540 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
543 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
544 and adjust the start offset. */
546 adjust_operand (rtx op, HOST_WIDE_INT * start)
548 enum machine_mode mode;
550 /* Strip any paradoxical SUBREG. */
551 if (GET_CODE (op) == SUBREG
552 && (GET_MODE_BITSIZE (GET_MODE (op))
553 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
557 GET_MODE_BITSIZE (GET_MODE (op)) -
558 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
559 op = SUBREG_REG (op);
561 /* If it is smaller than SI, assure a SUBREG */
562 op_size = GET_MODE_BITSIZE (GET_MODE (op));
566 *start += 32 - op_size;
569 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
570 mode = mode_for_size (op_size, MODE_INT, 0);
571 if (mode != GET_MODE (op))
572 op = gen_rtx_SUBREG (mode, op, 0);
577 spu_expand_extv (rtx ops[], int unsignedp)
579 HOST_WIDE_INT width = INTVAL (ops[2]);
580 HOST_WIDE_INT start = INTVAL (ops[3]);
581 HOST_WIDE_INT src_size, dst_size;
582 enum machine_mode src_mode, dst_mode;
583 rtx dst = ops[0], src = ops[1];
586 dst = adjust_operand (ops[0], 0);
587 dst_mode = GET_MODE (dst);
588 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
590 src = adjust_operand (src, &start);
591 src_mode = GET_MODE (src);
592 src_size = GET_MODE_BITSIZE (GET_MODE (src));
596 s = gen_reg_rtx (src_mode);
600 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
603 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
606 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
614 if (width < src_size)
621 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
624 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
627 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
632 s = gen_reg_rtx (src_mode);
633 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
638 convert_move (dst, src, unsignedp);
642 spu_expand_insv (rtx ops[])
644 HOST_WIDE_INT width = INTVAL (ops[1]);
645 HOST_WIDE_INT start = INTVAL (ops[2]);
646 HOST_WIDE_INT maskbits;
647 enum machine_mode dst_mode, src_mode;
648 rtx dst = ops[0], src = ops[3];
649 int dst_size, src_size;
655 if (GET_CODE (ops[0]) == MEM)
656 dst = gen_reg_rtx (TImode);
658 dst = adjust_operand (dst, &start);
659 dst_mode = GET_MODE (dst);
660 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
662 if (CONSTANT_P (src))
664 enum machine_mode m =
665 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
666 src = force_reg (m, convert_to_mode (m, src, 0));
668 src = adjust_operand (src, 0);
669 src_mode = GET_MODE (src);
670 src_size = GET_MODE_BITSIZE (GET_MODE (src));
672 mask = gen_reg_rtx (dst_mode);
673 shift_reg = gen_reg_rtx (dst_mode);
674 shift = dst_size - start - width;
676 /* It's not safe to use subreg here because the compiler assumes
677 that the SUBREG_REG is right justified in the SUBREG. */
678 convert_move (shift_reg, src, 1);
685 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
688 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
691 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
703 maskbits = (-1ll << (32 - width - start));
705 maskbits += (1ll << (32 - start));
706 emit_move_insn (mask, GEN_INT (maskbits));
709 maskbits = (-1ll << (64 - width - start));
711 maskbits += (1ll << (64 - start));
712 emit_move_insn (mask, GEN_INT (maskbits));
716 unsigned char arr[16];
718 memset (arr, 0, sizeof (arr));
719 arr[i] = 0xff >> (start & 7);
720 for (i++; i <= (start + width - 1) / 8; i++)
722 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
723 emit_move_insn (mask, array_to_constant (TImode, arr));
729 if (GET_CODE (ops[0]) == MEM)
731 rtx aligned = gen_reg_rtx (SImode);
732 rtx low = gen_reg_rtx (SImode);
733 rtx addr = gen_reg_rtx (SImode);
734 rtx rotl = gen_reg_rtx (SImode);
735 rtx mask0 = gen_reg_rtx (TImode);
738 emit_move_insn (addr, XEXP (ops[0], 0));
739 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
740 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
741 emit_insn (gen_negsi2 (rotl, low));
742 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
743 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
744 mem = change_address (ops[0], TImode, aligned);
745 set_mem_alias_set (mem, 0);
746 emit_move_insn (dst, mem);
747 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
748 emit_move_insn (mem, dst);
749 if (start + width > MEM_ALIGN (ops[0]))
751 rtx shl = gen_reg_rtx (SImode);
752 rtx mask1 = gen_reg_rtx (TImode);
753 rtx dst1 = gen_reg_rtx (TImode);
755 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
756 emit_insn (gen_shlqby_ti (mask1, mask, shl));
757 mem1 = adjust_address (mem, TImode, 16);
758 set_mem_alias_set (mem1, 0);
759 emit_move_insn (dst1, mem1);
760 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
761 emit_move_insn (mem1, dst1);
765 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
770 spu_expand_block_move (rtx ops[])
772 HOST_WIDE_INT bytes, align, offset;
773 rtx src, dst, sreg, dreg, target;
775 if (GET_CODE (ops[2]) != CONST_INT
776 || GET_CODE (ops[3]) != CONST_INT
777 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
780 bytes = INTVAL (ops[2]);
781 align = INTVAL (ops[3]);
791 for (offset = 0; offset + 16 <= bytes; offset += 16)
793 dst = adjust_address (ops[0], V16QImode, offset);
794 src = adjust_address (ops[1], V16QImode, offset);
795 emit_move_insn (dst, src);
800 unsigned char arr[16] = { 0 };
801 for (i = 0; i < bytes - offset; i++)
803 dst = adjust_address (ops[0], V16QImode, offset);
804 src = adjust_address (ops[1], V16QImode, offset);
805 mask = gen_reg_rtx (V16QImode);
806 sreg = gen_reg_rtx (V16QImode);
807 dreg = gen_reg_rtx (V16QImode);
808 target = gen_reg_rtx (V16QImode);
809 emit_move_insn (mask, array_to_constant (V16QImode, arr));
810 emit_move_insn (dreg, dst);
811 emit_move_insn (sreg, src);
812 emit_insn (gen_selb (target, dreg, sreg, mask));
813 emit_move_insn (dst, target);
821 { SPU_EQ, SPU_GT, SPU_GTU };
823 int spu_comp_icode[12][3] = {
824 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
825 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
826 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
827 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
828 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
829 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
830 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
831 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
832 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
833 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
834 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
835 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
838 /* Generate a compare for CODE. Return a brand-new rtx that represents
839 the result of the compare. GCC can figure this out too if we don't
840 provide all variations of compares, but GCC always wants to use
841 WORD_MODE, we can generate better code in most cases if we do it
844 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
846 int reverse_compare = 0;
847 int reverse_test = 0;
848 rtx compare_result, eq_result;
849 rtx comp_rtx, eq_rtx;
850 rtx target = operands[0];
851 enum machine_mode comp_mode;
852 enum machine_mode op_mode;
853 enum spu_comp_code scode, eq_code;
854 enum insn_code ior_code;
858 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
859 and so on, to keep the constant in operand 1. */
860 if (GET_CODE (spu_compare_op1) == CONST_INT)
862 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
863 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
867 spu_compare_op1 = GEN_INT (val);
871 spu_compare_op1 = GEN_INT (val);
875 spu_compare_op1 = GEN_INT (val);
879 spu_compare_op1 = GEN_INT (val);
888 op_mode = GET_MODE (spu_compare_op0);
894 if (HONOR_NANS (op_mode))
909 if (HONOR_NANS (op_mode))
1001 comp_mode = V4SImode;
1005 comp_mode = V2DImode;
1012 if (GET_MODE (spu_compare_op1) == DFmode
1013 && (scode != SPU_GT && scode != SPU_EQ))
1016 if (is_set == 0 && spu_compare_op1 == const0_rtx
1017 && (GET_MODE (spu_compare_op0) == SImode
1018 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
1020 /* Don't need to set a register with the result when we are
1021 comparing against zero and branching. */
1022 reverse_test = !reverse_test;
1023 compare_result = spu_compare_op0;
1027 compare_result = gen_reg_rtx (comp_mode);
1029 if (reverse_compare)
1031 rtx t = spu_compare_op1;
1032 spu_compare_op1 = spu_compare_op0;
1033 spu_compare_op0 = t;
1036 if (spu_comp_icode[index][scode] == 0)
1039 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1040 (spu_compare_op0, op_mode))
1041 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
1042 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1043 (spu_compare_op1, op_mode))
1044 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
1045 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1050 emit_insn (comp_rtx);
1054 eq_result = gen_reg_rtx (comp_mode);
1055 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1061 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1062 gcc_assert (ior_code != CODE_FOR_nothing);
1063 emit_insn (GEN_FCN (ior_code)
1064 (compare_result, compare_result, eq_result));
1073 /* We don't have branch on QI compare insns, so we convert the
1074 QI compare result to a HI result. */
1075 if (comp_mode == QImode)
1077 rtx old_res = compare_result;
1078 compare_result = gen_reg_rtx (HImode);
1080 emit_insn (gen_extendqihi2 (compare_result, old_res));
1084 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1086 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1088 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1089 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1090 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1093 else if (is_set == 2)
1095 int compare_size = GET_MODE_BITSIZE (comp_mode);
1096 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1097 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1099 rtx op_t = operands[2];
1100 rtx op_f = operands[3];
1102 /* The result of the comparison can be SI, HI or QI mode. Create a
1103 mask based on that result. */
1104 if (target_size > compare_size)
1106 select_mask = gen_reg_rtx (mode);
1107 emit_insn (gen_extend_compare (select_mask, compare_result));
1109 else if (target_size < compare_size)
1111 gen_rtx_SUBREG (mode, compare_result,
1112 (compare_size - target_size) / BITS_PER_UNIT);
1113 else if (comp_mode != mode)
1114 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1116 select_mask = compare_result;
1118 if (GET_MODE (target) != GET_MODE (op_t)
1119 || GET_MODE (target) != GET_MODE (op_f))
1123 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1125 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1130 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1131 gen_rtx_NOT (comp_mode, compare_result)));
1132 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1133 emit_insn (gen_extendhisi2 (target, compare_result));
1134 else if (GET_MODE (target) == SImode
1135 && GET_MODE (compare_result) == QImode)
1136 emit_insn (gen_extend_compare (target, compare_result));
1138 emit_move_insn (target, compare_result);
1143 const_double_to_hwint (rtx x)
1147 if (GET_MODE (x) == SFmode)
1149 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1150 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1152 else if (GET_MODE (x) == DFmode)
1155 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1156 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1158 val = (val << 32) | (l[1] & 0xffffffff);
1166 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1170 gcc_assert (mode == SFmode || mode == DFmode);
1173 tv[0] = (v << 32) >> 32;
1174 else if (mode == DFmode)
1176 tv[1] = (v << 32) >> 32;
1179 real_from_target (&rv, tv, mode);
1180 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1184 print_operand_address (FILE * file, register rtx addr)
1189 if (GET_CODE (addr) == AND
1190 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1191 && INTVAL (XEXP (addr, 1)) == -16)
1192 addr = XEXP (addr, 0);
1194 switch (GET_CODE (addr))
1197 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1201 reg = XEXP (addr, 0);
1202 offset = XEXP (addr, 1);
1203 if (GET_CODE (offset) == REG)
1205 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1206 reg_names[REGNO (offset)]);
1208 else if (GET_CODE (offset) == CONST_INT)
1210 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1211 INTVAL (offset), reg_names[REGNO (reg)]);
1221 output_addr_const (file, addr);
1231 print_operand (FILE * file, rtx x, int code)
1233 enum machine_mode mode = GET_MODE (x);
1235 unsigned char arr[16];
1236 int xcode = GET_CODE (x);
1238 if (GET_MODE (x) == VOIDmode)
1241 case 'L': /* 128 bits, signed */
1242 case 'm': /* 128 bits, signed */
1243 case 'T': /* 128 bits, signed */
1244 case 't': /* 128 bits, signed */
1247 case 'K': /* 64 bits, signed */
1248 case 'k': /* 64 bits, signed */
1249 case 'D': /* 64 bits, signed */
1250 case 'd': /* 64 bits, signed */
1253 case 'J': /* 32 bits, signed */
1254 case 'j': /* 32 bits, signed */
1255 case 's': /* 32 bits, signed */
1256 case 'S': /* 32 bits, signed */
1263 case 'j': /* 32 bits, signed */
1264 case 'k': /* 64 bits, signed */
1265 case 'm': /* 128 bits, signed */
1266 if (xcode == CONST_INT
1267 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1269 gcc_assert (logical_immediate_p (x, mode));
1270 constant_to_array (mode, x, arr);
1271 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1272 val = trunc_int_for_mode (val, SImode);
1273 switch (which_logical_immediate (val))
1278 fprintf (file, "h");
1281 fprintf (file, "b");
1291 case 'J': /* 32 bits, signed */
1292 case 'K': /* 64 bits, signed */
1293 case 'L': /* 128 bits, signed */
1294 if (xcode == CONST_INT
1295 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1297 gcc_assert (logical_immediate_p (x, mode)
1298 || iohl_immediate_p (x, mode));
1299 constant_to_array (mode, x, arr);
1300 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1301 val = trunc_int_for_mode (val, SImode);
1302 switch (which_logical_immediate (val))
1308 val = trunc_int_for_mode (val, HImode);
1311 val = trunc_int_for_mode (val, QImode);
1316 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1322 case 't': /* 128 bits, signed */
1323 case 'd': /* 64 bits, signed */
1324 case 's': /* 32 bits, signed */
1327 enum immediate_class c = classify_immediate (x, mode);
1331 constant_to_array (mode, x, arr);
1332 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1333 val = trunc_int_for_mode (val, SImode);
1334 switch (which_immediate_load (val))
1339 fprintf (file, "a");
1342 fprintf (file, "h");
1345 fprintf (file, "hu");
1352 constant_to_array (mode, x, arr);
1353 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1355 fprintf (file, "b");
1357 fprintf (file, "h");
1359 fprintf (file, "w");
1361 fprintf (file, "d");
1364 if (xcode == CONST_VECTOR)
1366 x = CONST_VECTOR_ELT (x, 0);
1367 xcode = GET_CODE (x);
1369 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1370 fprintf (file, "a");
1371 else if (xcode == HIGH)
1372 fprintf (file, "hu");
1386 case 'T': /* 128 bits, signed */
1387 case 'D': /* 64 bits, signed */
1388 case 'S': /* 32 bits, signed */
1391 enum immediate_class c = classify_immediate (x, mode);
1395 constant_to_array (mode, x, arr);
1396 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1397 val = trunc_int_for_mode (val, SImode);
1398 switch (which_immediate_load (val))
1405 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1410 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1413 constant_to_array (mode, x, arr);
1415 for (i = 0; i < 16; i++)
1420 print_operand (file, GEN_INT (val), 0);
1423 constant_to_array (mode, x, arr);
1424 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1425 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1430 if (GET_CODE (x) == CONST_VECTOR)
1431 x = CONST_VECTOR_ELT (x, 0);
1432 output_addr_const (file, x);
1434 fprintf (file, "@h");
1448 if (xcode == CONST_INT)
1450 /* Only 4 least significant bits are relevant for generate
1451 control word instructions. */
1452 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1457 case 'M': /* print code for c*d */
1458 if (GET_CODE (x) == CONST_INT)
1462 fprintf (file, "b");
1465 fprintf (file, "h");
1468 fprintf (file, "w");
1471 fprintf (file, "d");
1480 case 'N': /* Negate the operand */
1481 if (xcode == CONST_INT)
1482 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1483 else if (xcode == CONST_VECTOR)
1484 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1485 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1488 case 'I': /* enable/disable interrupts */
1489 if (xcode == CONST_INT)
1490 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1493 case 'b': /* branch modifiers */
1495 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1496 else if (COMPARISON_P (x))
1497 fprintf (file, "%s", xcode == NE ? "n" : "");
1500 case 'i': /* indirect call */
1503 if (GET_CODE (XEXP (x, 0)) == REG)
1504 /* Used in indirect function calls. */
1505 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1507 output_address (XEXP (x, 0));
1511 case 'p': /* load/store */
1515 xcode = GET_CODE (x);
1520 xcode = GET_CODE (x);
1523 fprintf (file, "d");
1524 else if (xcode == CONST_INT)
1525 fprintf (file, "a");
1526 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1527 fprintf (file, "r");
1528 else if (xcode == PLUS || xcode == LO_SUM)
1530 if (GET_CODE (XEXP (x, 1)) == REG)
1531 fprintf (file, "x");
1533 fprintf (file, "d");
1538 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1540 output_addr_const (file, GEN_INT (val));
1544 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1546 output_addr_const (file, GEN_INT (val));
1550 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1552 output_addr_const (file, GEN_INT (val));
1556 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1557 val = (val >> 3) & 0x1f;
1558 output_addr_const (file, GEN_INT (val));
1562 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1565 output_addr_const (file, GEN_INT (val));
1569 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1572 output_addr_const (file, GEN_INT (val));
1576 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1579 output_addr_const (file, GEN_INT (val));
1583 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1584 val = -(val & -8ll);
1585 val = (val >> 3) & 0x1f;
1586 output_addr_const (file, GEN_INT (val));
1591 constant_to_array (mode, x, arr);
1592 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1593 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1598 fprintf (file, "%s", reg_names[REGNO (x)]);
1599 else if (xcode == MEM)
1600 output_address (XEXP (x, 0));
1601 else if (xcode == CONST_VECTOR)
1602 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1604 output_addr_const (file, x);
1611 output_operand_lossage ("invalid %%xn code");
1616 extern char call_used_regs[];
1618 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1619 caller saved register. For leaf functions it is more efficient to
1620 use a volatile register because we won't need to save and restore the
1621 pic register. This routine is only valid after register allocation
1622 is completed, so we can pick an unused register. */
1626 rtx pic_reg = pic_offset_table_rtx;
1627 if (!reload_completed && !reload_in_progress)
1632 /* Split constant addresses to handle cases that are too large.
1633 Add in the pic register when in PIC mode.
1634 Split immediates that require more than 1 instruction. */
1636 spu_split_immediate (rtx * ops)
1638 enum machine_mode mode = GET_MODE (ops[0]);
1639 enum immediate_class c = classify_immediate (ops[1], mode);
1645 unsigned char arrhi[16];
1646 unsigned char arrlo[16];
1647 rtx to, temp, hi, lo;
1649 enum machine_mode imode = mode;
1650 /* We need to do reals as ints because the constant used in the
1651 IOR might not be a legitimate real constant. */
1652 imode = int_mode_for_mode (mode);
1653 constant_to_array (mode, ops[1], arrhi);
1655 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1658 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1659 for (i = 0; i < 16; i += 4)
1661 arrlo[i + 2] = arrhi[i + 2];
1662 arrlo[i + 3] = arrhi[i + 3];
1663 arrlo[i + 0] = arrlo[i + 1] = 0;
1664 arrhi[i + 2] = arrhi[i + 3] = 0;
1666 hi = array_to_constant (imode, arrhi);
1667 lo = array_to_constant (imode, arrlo);
1668 emit_move_insn (temp, hi);
1669 emit_insn (gen_rtx_SET
1670 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1675 unsigned char arr_fsmbi[16];
1676 unsigned char arr_andbi[16];
1677 rtx to, reg_fsmbi, reg_and;
1679 enum machine_mode imode = mode;
1680 /* We need to do reals as ints because the constant used in the
1681 * AND might not be a legitimate real constant. */
1682 imode = int_mode_for_mode (mode);
1683 constant_to_array (mode, ops[1], arr_fsmbi);
1685 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1688 for (i = 0; i < 16; i++)
1689 if (arr_fsmbi[i] != 0)
1691 arr_andbi[0] = arr_fsmbi[i];
1692 arr_fsmbi[i] = 0xff;
1694 for (i = 1; i < 16; i++)
1695 arr_andbi[i] = arr_andbi[0];
1696 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1697 reg_and = array_to_constant (imode, arr_andbi);
1698 emit_move_insn (to, reg_fsmbi);
1699 emit_insn (gen_rtx_SET
1700 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1704 if (reload_in_progress || reload_completed)
1706 rtx mem = force_const_mem (mode, ops[1]);
1707 if (TARGET_LARGE_MEM)
1709 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1710 emit_move_insn (addr, XEXP (mem, 0));
1711 mem = replace_equiv_address (mem, addr);
1713 emit_move_insn (ops[0], mem);
1719 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1723 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1724 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1727 emit_insn (gen_pic (ops[0], ops[1]));
1730 rtx pic_reg = get_pic_reg ();
1731 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1732 crtl->uses_pic_offset_table = 1;
1734 return flag_pic || c == IC_IL2s;
1745 /* SAVING is TRUE when we are generating the actual load and store
1746 instructions for REGNO. When determining the size of the stack
1747 needed for saving register we must allocate enough space for the
1748 worst case, because we don't always have the information early enough
1749 to not allocate it. But we can at least eliminate the actual loads
1750 and stores during the prologue/epilogue. */
1752 need_to_save_reg (int regno, int saving)
1754 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1757 && regno == PIC_OFFSET_TABLE_REGNUM
1758 && (!saving || crtl->uses_pic_offset_table)
1760 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1765 /* This function is only correct starting with local register
1768 spu_saved_regs_size (void)
1770 int reg_save_size = 0;
1773 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1774 if (need_to_save_reg (regno, 0))
1775 reg_save_size += 0x10;
1776 return reg_save_size;
1780 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1782 rtx reg = gen_rtx_REG (V4SImode, regno);
1784 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1785 return emit_insn (gen_movv4si (mem, reg));
1789 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1791 rtx reg = gen_rtx_REG (V4SImode, regno);
1793 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1794 return emit_insn (gen_movv4si (reg, mem));
1797 /* This happens after reload, so we need to expand it. */
1799 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1802 if (satisfies_constraint_K (GEN_INT (imm)))
1804 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1808 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1809 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1810 if (REGNO (src) == REGNO (scratch))
1816 /* Return nonzero if this function is known to have a null epilogue. */
1819 direct_return (void)
1821 if (reload_completed)
1823 if (cfun->static_chain_decl == 0
1824 && (spu_saved_regs_size ()
1826 + crtl->outgoing_args_size
1827 + crtl->args.pretend_args_size == 0)
1828 && current_function_is_leaf)
1835 The stack frame looks like this:
1839 AP -> +-------------+
1842 prev SP | back chain |
1845 | reg save | crtl->args.pretend_args_size bytes
1848 | saved regs | spu_saved_regs_size() bytes
1849 FP -> +-------------+
1851 | vars | get_frame_size() bytes
1852 HFP -> +-------------+
1855 | args | crtl->outgoing_args_size bytes
1861 SP -> +-------------+
1865 spu_expand_prologue (void)
1867 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1868 HOST_WIDE_INT total_size;
1869 HOST_WIDE_INT saved_regs_size;
1870 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1871 rtx scratch_reg_0, scratch_reg_1;
1874 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1875 the "toplevel" insn chain. */
1876 emit_note (NOTE_INSN_DELETED);
1878 if (flag_pic && optimize == 0)
1879 crtl->uses_pic_offset_table = 1;
1881 if (spu_naked_function_p (current_function_decl))
1884 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1885 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1887 saved_regs_size = spu_saved_regs_size ();
1888 total_size = size + saved_regs_size
1889 + crtl->outgoing_args_size
1890 + crtl->args.pretend_args_size;
1892 if (!current_function_is_leaf
1893 || cfun->calls_alloca || total_size > 0)
1894 total_size += STACK_POINTER_OFFSET;
1896 /* Save this first because code after this might use the link
1897 register as a scratch register. */
1898 if (!current_function_is_leaf)
1900 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1901 RTX_FRAME_RELATED_P (insn) = 1;
1906 offset = -crtl->args.pretend_args_size;
1907 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1908 if (need_to_save_reg (regno, 1))
1911 insn = frame_emit_store (regno, sp_reg, offset);
1912 RTX_FRAME_RELATED_P (insn) = 1;
1916 if (flag_pic && crtl->uses_pic_offset_table)
1918 rtx pic_reg = get_pic_reg ();
1919 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1920 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1925 if (flag_stack_check)
1927 /* We compare against total_size-1 because
1928 ($sp >= total_size) <=> ($sp > total_size-1) */
1929 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1930 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1931 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1932 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1934 emit_move_insn (scratch_v4si, size_v4si);
1935 size_v4si = scratch_v4si;
1937 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1938 emit_insn (gen_vec_extractv4si
1939 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1940 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1943 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1944 the value of the previous $sp because we save it as the back
1946 if (total_size <= 2000)
1948 /* In this case we save the back chain first. */
1949 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1951 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1955 insn = emit_move_insn (scratch_reg_0, sp_reg);
1957 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1959 RTX_FRAME_RELATED_P (insn) = 1;
1960 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1961 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1963 if (total_size > 2000)
1965 /* Save the back chain ptr */
1966 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1969 if (frame_pointer_needed)
1971 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1972 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1973 + crtl->outgoing_args_size;
1974 /* Set the new frame_pointer */
1975 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1976 RTX_FRAME_RELATED_P (insn) = 1;
1977 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1978 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1979 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1983 emit_note (NOTE_INSN_DELETED);
1987 spu_expand_epilogue (bool sibcall_p)
1989 int size = get_frame_size (), offset, regno;
1990 HOST_WIDE_INT saved_regs_size, total_size;
1991 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1992 rtx jump, scratch_reg_0;
1994 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1995 the "toplevel" insn chain. */
1996 emit_note (NOTE_INSN_DELETED);
1998 if (spu_naked_function_p (current_function_decl))
2001 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2003 saved_regs_size = spu_saved_regs_size ();
2004 total_size = size + saved_regs_size
2005 + crtl->outgoing_args_size
2006 + crtl->args.pretend_args_size;
2008 if (!current_function_is_leaf
2009 || cfun->calls_alloca || total_size > 0)
2010 total_size += STACK_POINTER_OFFSET;
2014 if (cfun->calls_alloca)
2015 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2017 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2020 if (saved_regs_size > 0)
2022 offset = -crtl->args.pretend_args_size;
2023 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2024 if (need_to_save_reg (regno, 1))
2027 frame_emit_load (regno, sp_reg, offset);
2032 if (!current_function_is_leaf)
2033 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2037 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2038 jump = emit_jump_insn (gen__return ());
2039 emit_barrier_after (jump);
2042 emit_note (NOTE_INSN_DELETED);
2046 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2050 /* This is inefficient because it ends up copying to a save-register
2051 which then gets saved even though $lr has already been saved. But
2052 it does generate better code for leaf functions and we don't need
2053 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2054 used for __builtin_return_address anyway, so maybe we don't care if
2055 it's inefficient. */
2056 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2060 /* Given VAL, generate a constant appropriate for MODE.
2061 If MODE is a vector mode, every element will be VAL.
2062 For TImode, VAL will be zero extended to 128 bits. */
2064 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2070 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2071 || GET_MODE_CLASS (mode) == MODE_FLOAT
2072 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2073 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2075 if (GET_MODE_CLASS (mode) == MODE_INT)
2076 return immed_double_const (val, 0, mode);
2078 /* val is the bit representation of the float */
2079 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2080 return hwint_to_const_double (mode, val);
2082 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2083 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2085 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2087 units = GET_MODE_NUNITS (mode);
2089 v = rtvec_alloc (units);
2091 for (i = 0; i < units; ++i)
2092 RTVEC_ELT (v, i) = inner;
2094 return gen_rtx_CONST_VECTOR (mode, v);
2097 /* Create a MODE vector constant from 4 ints. */
2099 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2101 unsigned char arr[16];
2102 arr[0] = (a >> 24) & 0xff;
2103 arr[1] = (a >> 16) & 0xff;
2104 arr[2] = (a >> 8) & 0xff;
2105 arr[3] = (a >> 0) & 0xff;
2106 arr[4] = (b >> 24) & 0xff;
2107 arr[5] = (b >> 16) & 0xff;
2108 arr[6] = (b >> 8) & 0xff;
2109 arr[7] = (b >> 0) & 0xff;
2110 arr[8] = (c >> 24) & 0xff;
2111 arr[9] = (c >> 16) & 0xff;
2112 arr[10] = (c >> 8) & 0xff;
2113 arr[11] = (c >> 0) & 0xff;
2114 arr[12] = (d >> 24) & 0xff;
2115 arr[13] = (d >> 16) & 0xff;
2116 arr[14] = (d >> 8) & 0xff;
2117 arr[15] = (d >> 0) & 0xff;
2118 return array_to_constant(mode, arr);
2121 /* branch hint stuff */
2123 /* An array of these is used to propagate hints to predecessor blocks. */
2126 rtx prop_jump; /* propagated from another block */
2127 int bb_index; /* the original block. */
2129 static struct spu_bb_info *spu_bb_info;
2131 #define STOP_HINT_P(INSN) \
2132 (GET_CODE(INSN) == CALL_INSN \
2133 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2134 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2136 /* 1 when RTX is a hinted branch or its target. We keep track of
2137 what has been hinted so the safe-hint code can test it easily. */
2138 #define HINTED_P(RTX) \
2139 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2141 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2142 #define SCHED_ON_EVEN_P(RTX) \
2143 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2145 /* Emit a nop for INSN such that the two will dual issue. This assumes
2146 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2147 We check for TImode to handle a MULTI1 insn which has dual issued its
2148 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2151 emit_nop_for_insn (rtx insn)
2155 p = get_pipe (insn);
2156 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2157 new_insn = emit_insn_after (gen_lnop (), insn);
2158 else if (p == 1 && GET_MODE (insn) == TImode)
2160 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2161 PUT_MODE (new_insn, TImode);
2162 PUT_MODE (insn, VOIDmode);
2165 new_insn = emit_insn_after (gen_lnop (), insn);
2166 recog_memoized (new_insn);
2169 /* Insert nops in basic blocks to meet dual issue alignment
2170 requirements. Also make sure hbrp and hint instructions are at least
2171 one cycle apart, possibly inserting a nop. */
2175 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2179 /* This sets up INSN_ADDRESSES. */
2180 shorten_branches (get_insns ());
2182 /* Keep track of length added by nops. */
2186 insn = get_insns ();
2187 if (!active_insn_p (insn))
2188 insn = next_active_insn (insn);
2189 for (; insn; insn = next_insn)
2191 next_insn = next_active_insn (insn);
2192 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2193 || INSN_CODE (insn) == CODE_FOR_hbr)
2197 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2198 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2199 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2202 prev_insn = emit_insn_before (gen_lnop (), insn);
2203 PUT_MODE (prev_insn, GET_MODE (insn));
2204 PUT_MODE (insn, TImode);
2210 if (INSN_CODE (insn) == CODE_FOR_blockage)
2212 if (GET_MODE (insn) == TImode)
2213 PUT_MODE (next_insn, TImode);
2215 next_insn = next_active_insn (insn);
2217 addr = INSN_ADDRESSES (INSN_UID (insn));
2218 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2220 if (((addr + length) & 7) != 0)
2222 emit_nop_for_insn (prev_insn);
2226 else if (GET_MODE (insn) == TImode
2227 && ((next_insn && GET_MODE (next_insn) != TImode)
2228 || get_attr_type (insn) == TYPE_MULTI0)
2229 && ((addr + length) & 7) != 0)
2231 /* prev_insn will always be set because the first insn is
2232 always 8-byte aligned. */
2233 emit_nop_for_insn (prev_insn);
2241 /* Routines for branch hints. */
2244 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2245 int distance, sbitmap blocks)
2247 rtx branch_label = 0;
2252 if (before == 0 || branch == 0 || target == 0)
2255 /* While scheduling we require hints to be no further than 600, so
2256 we need to enforce that here too */
2260 /* If we have a Basic block note, emit it after the basic block note. */
2261 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2262 before = NEXT_INSN (before);
2264 branch_label = gen_label_rtx ();
2265 LABEL_NUSES (branch_label)++;
2266 LABEL_PRESERVE_P (branch_label) = 1;
2267 insn = emit_label_before (branch_label, branch);
2268 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2269 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2271 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2272 recog_memoized (hint);
2273 HINTED_P (branch) = 1;
2275 if (GET_CODE (target) == LABEL_REF)
2276 HINTED_P (XEXP (target, 0)) = 1;
2277 else if (tablejump_p (branch, 0, &table))
2281 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2282 vec = XVEC (PATTERN (table), 0);
2284 vec = XVEC (PATTERN (table), 1);
2285 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2286 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2289 if (distance >= 588)
2291 /* Make sure the hint isn't scheduled any earlier than this point,
2292 which could make it too far for the branch offest to fit */
2293 recog_memoized (emit_insn_before (gen_blockage (), hint));
2295 else if (distance <= 8 * 4)
2297 /* To guarantee at least 8 insns between the hint and branch we
2300 for (d = distance; d < 8 * 4; d += 4)
2303 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2304 recog_memoized (insn);
2307 /* Make sure any nops inserted aren't scheduled before the hint. */
2308 recog_memoized (emit_insn_after (gen_blockage (), hint));
2310 /* Make sure any nops inserted aren't scheduled after the call. */
2311 if (CALL_P (branch) && distance < 8 * 4)
2312 recog_memoized (emit_insn_before (gen_blockage (), branch));
2316 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2317 the rtx for the branch target. */
2319 get_branch_target (rtx branch)
2321 if (GET_CODE (branch) == JUMP_INSN)
2325 /* Return statements */
2326 if (GET_CODE (PATTERN (branch)) == RETURN)
2327 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2330 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2331 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2334 set = single_set (branch);
2335 src = SET_SRC (set);
2336 if (GET_CODE (SET_DEST (set)) != PC)
2339 if (GET_CODE (src) == IF_THEN_ELSE)
2342 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2345 /* If the more probable case is not a fall through, then
2346 try a branch hint. */
2347 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2348 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2349 && GET_CODE (XEXP (src, 1)) != PC)
2350 lab = XEXP (src, 1);
2351 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2352 && GET_CODE (XEXP (src, 2)) != PC)
2353 lab = XEXP (src, 2);
2357 if (GET_CODE (lab) == RETURN)
2358 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2366 else if (GET_CODE (branch) == CALL_INSN)
2369 /* All of our call patterns are in a PARALLEL and the CALL is
2370 the first pattern in the PARALLEL. */
2371 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2373 call = XVECEXP (PATTERN (branch), 0, 0);
2374 if (GET_CODE (call) == SET)
2375 call = SET_SRC (call);
2376 if (GET_CODE (call) != CALL)
2378 return XEXP (XEXP (call, 0), 0);
2383 /* The special $hbr register is used to prevent the insn scheduler from
2384 moving hbr insns across instructions which invalidate them. It
2385 should only be used in a clobber, and this function searches for
2386 insns which clobber it. */
2388 insn_clobbers_hbr (rtx insn)
2391 && GET_CODE (PATTERN (insn)) == PARALLEL)
2393 rtx parallel = PATTERN (insn);
2396 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2398 clobber = XVECEXP (parallel, 0, j);
2399 if (GET_CODE (clobber) == CLOBBER
2400 && GET_CODE (XEXP (clobber, 0)) == REG
2401 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2408 /* Search up to 32 insns starting at FIRST:
2409 - at any kind of hinted branch, just return
2410 - at any unconditional branch in the first 15 insns, just return
2411 - at a call or indirect branch, after the first 15 insns, force it to
2412 an even address and return
2413 - at any unconditional branch, after the first 15 insns, force it to
2415 At then end of the search, insert an hbrp within 4 insns of FIRST,
2416 and an hbrp within 16 instructions of FIRST.
2419 insert_hbrp_for_ilb_runout (rtx first)
2421 rtx insn, before_4 = 0, before_16 = 0;
2422 int addr = 0, length, first_addr = -1;
2423 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2424 int insert_lnop_after = 0;
2425 for (insn = first; insn; insn = NEXT_INSN (insn))
2428 if (first_addr == -1)
2429 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2430 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2431 length = get_attr_length (insn);
2433 if (before_4 == 0 && addr + length >= 4 * 4)
2435 /* We test for 14 instructions because the first hbrp will add
2436 up to 2 instructions. */
2437 if (before_16 == 0 && addr + length >= 14 * 4)
2440 if (INSN_CODE (insn) == CODE_FOR_hbr)
2442 /* Make sure an hbrp is at least 2 cycles away from a hint.
2443 Insert an lnop after the hbrp when necessary. */
2444 if (before_4 == 0 && addr > 0)
2447 insert_lnop_after |= 1;
2449 else if (before_4 && addr <= 4 * 4)
2450 insert_lnop_after |= 1;
2451 if (before_16 == 0 && addr > 10 * 4)
2454 insert_lnop_after |= 2;
2456 else if (before_16 && addr <= 14 * 4)
2457 insert_lnop_after |= 2;
2460 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2462 if (addr < hbrp_addr0)
2464 else if (addr < hbrp_addr1)
2468 if (CALL_P (insn) || JUMP_P (insn))
2470 if (HINTED_P (insn))
2473 /* Any branch after the first 15 insns should be on an even
2474 address to avoid a special case branch. There might be
2475 some nops and/or hbrps inserted, so we test after 10
2478 SCHED_ON_EVEN_P (insn) = 1;
2481 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2485 if (addr + length >= 32 * 4)
2487 gcc_assert (before_4 && before_16);
2488 if (hbrp_addr0 > 4 * 4)
2491 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2492 recog_memoized (insn);
2493 INSN_ADDRESSES_NEW (insn,
2494 INSN_ADDRESSES (INSN_UID (before_4)));
2495 PUT_MODE (insn, GET_MODE (before_4));
2496 PUT_MODE (before_4, TImode);
2497 if (insert_lnop_after & 1)
2499 insn = emit_insn_before (gen_lnop (), before_4);
2500 recog_memoized (insn);
2501 INSN_ADDRESSES_NEW (insn,
2502 INSN_ADDRESSES (INSN_UID (before_4)));
2503 PUT_MODE (insn, TImode);
2506 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2507 && hbrp_addr1 > 16 * 4)
2510 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2511 recog_memoized (insn);
2512 INSN_ADDRESSES_NEW (insn,
2513 INSN_ADDRESSES (INSN_UID (before_16)));
2514 PUT_MODE (insn, GET_MODE (before_16));
2515 PUT_MODE (before_16, TImode);
2516 if (insert_lnop_after & 2)
2518 insn = emit_insn_before (gen_lnop (), before_16);
2519 recog_memoized (insn);
2520 INSN_ADDRESSES_NEW (insn,
2521 INSN_ADDRESSES (INSN_UID
2523 PUT_MODE (insn, TImode);
2529 else if (BARRIER_P (insn))
2534 /* The SPU might hang when it executes 48 inline instructions after a
2535 hinted branch jumps to its hinted target. The beginning of a
2536 function and the return from a call might have been hinted, and must
2537 be handled as well. To prevent a hang we insert 2 hbrps. The first
2538 should be within 6 insns of the branch target. The second should be
2539 within 22 insns of the branch target. When determining if hbrps are
2540 necessary, we look for only 32 inline instructions, because up to to
2541 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2542 new hbrps, we insert them within 4 and 16 insns of the target. */
2547 if (TARGET_SAFE_HINTS)
2549 shorten_branches (get_insns ());
2550 /* Insert hbrp at beginning of function */
2551 insn = next_active_insn (get_insns ());
2553 insert_hbrp_for_ilb_runout (insn);
2554 /* Insert hbrp after hinted targets. */
2555 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2556 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2557 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2561 static int in_spu_reorg;
2563 /* Insert branch hints. There are no branch optimizations after this
2564 pass, so it's safe to set our branch hints now. */
2566 spu_machine_dependent_reorg (void)
2571 rtx branch_target = 0;
2572 int branch_addr = 0, insn_addr, required_dist = 0;
2576 if (!TARGET_BRANCH_HINTS || optimize == 0)
2578 /* We still do it for unoptimized code because an external
2579 function might have hinted a call or return. */
2585 blocks = sbitmap_alloc (last_basic_block);
2586 sbitmap_zero (blocks);
2589 compute_bb_for_insn ();
2594 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2595 sizeof (struct spu_bb_info));
2597 /* We need exact insn addresses and lengths. */
2598 shorten_branches (get_insns ());
2600 for (i = n_basic_blocks - 1; i >= 0; i--)
2602 bb = BASIC_BLOCK (i);
2604 if (spu_bb_info[i].prop_jump)
2606 branch = spu_bb_info[i].prop_jump;
2607 branch_target = get_branch_target (branch);
2608 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2609 required_dist = spu_hint_dist;
2611 /* Search from end of a block to beginning. In this loop, find
2612 jumps which need a branch and emit them only when:
2613 - it's an indirect branch and we're at the insn which sets
2615 - we're at an insn that will invalidate the hint. e.g., a
2616 call, another hint insn, inline asm that clobbers $hbr, and
2617 some inlined operations (divmodsi4). Don't consider jumps
2618 because they are only at the end of a block and are
2619 considered when we are deciding whether to propagate
2620 - we're getting too far away from the branch. The hbr insns
2621 only have a signed 10 bit offset
2622 We go back as far as possible so the branch will be considered
2623 for propagation when we get to the beginning of the block. */
2624 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2628 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2630 && ((GET_CODE (branch_target) == REG
2631 && set_of (branch_target, insn) != NULL_RTX)
2632 || insn_clobbers_hbr (insn)
2633 || branch_addr - insn_addr > 600))
2635 rtx next = NEXT_INSN (insn);
2636 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2637 if (insn != BB_END (bb)
2638 && branch_addr - next_addr >= required_dist)
2642 "hint for %i in block %i before %i\n",
2643 INSN_UID (branch), bb->index,
2645 spu_emit_branch_hint (next, branch, branch_target,
2646 branch_addr - next_addr, blocks);
2651 /* JUMP_P will only be true at the end of a block. When
2652 branch is already set it means we've previously decided
2653 to propagate a hint for that branch into this block. */
2654 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2657 if ((branch_target = get_branch_target (insn)))
2660 branch_addr = insn_addr;
2661 required_dist = spu_hint_dist;
2665 if (insn == BB_HEAD (bb))
2671 /* If we haven't emitted a hint for this branch yet, it might
2672 be profitable to emit it in one of the predecessor blocks,
2673 especially for loops. */
2675 basic_block prev = 0, prop = 0, prev2 = 0;
2676 int loop_exit = 0, simple_loop = 0;
2677 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2679 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2680 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2681 prev = EDGE_PRED (bb, j)->src;
2683 prev2 = EDGE_PRED (bb, j)->src;
2685 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2686 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2688 else if (EDGE_SUCC (bb, j)->dest == bb)
2691 /* If this branch is a loop exit then propagate to previous
2692 fallthru block. This catches the cases when it is a simple
2693 loop or when there is an initial branch into the loop. */
2694 if (prev && (loop_exit || simple_loop)
2695 && prev->loop_depth <= bb->loop_depth)
2698 /* If there is only one adjacent predecessor. Don't propagate
2699 outside this loop. This loop_depth test isn't perfect, but
2700 I'm not sure the loop_father member is valid at this point. */
2701 else if (prev && single_pred_p (bb)
2702 && prev->loop_depth == bb->loop_depth)
2705 /* If this is the JOIN block of a simple IF-THEN then
2706 propogate the hint to the HEADER block. */
2707 else if (prev && prev2
2708 && EDGE_COUNT (bb->preds) == 2
2709 && EDGE_COUNT (prev->preds) == 1
2710 && EDGE_PRED (prev, 0)->src == prev2
2711 && prev2->loop_depth == bb->loop_depth
2712 && GET_CODE (branch_target) != REG)
2715 /* Don't propagate when:
2716 - this is a simple loop and the hint would be too far
2717 - this is not a simple loop and there are 16 insns in
2719 - the predecessor block ends in a branch that will be
2721 - the predecessor block ends in an insn that invalidates
2725 && (bbend = BB_END (prop))
2726 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2727 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2728 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2731 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2732 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2733 bb->index, prop->index, bb->loop_depth,
2734 INSN_UID (branch), loop_exit, simple_loop,
2735 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2737 spu_bb_info[prop->index].prop_jump = branch;
2738 spu_bb_info[prop->index].bb_index = i;
2740 else if (branch_addr - next_addr >= required_dist)
2743 fprintf (dump_file, "hint for %i in block %i before %i\n",
2744 INSN_UID (branch), bb->index,
2745 INSN_UID (NEXT_INSN (insn)));
2746 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2747 branch_addr - next_addr, blocks);
2754 if (!sbitmap_empty_p (blocks))
2755 find_many_sub_basic_blocks (blocks);
2757 /* We have to schedule to make sure alignment is ok. */
2758 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2760 /* The hints need to be scheduled, so call it again. */
2767 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2768 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2770 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2771 between its branch label and the branch . We don't move the
2772 label because GCC expects it at the beginning of the block. */
2773 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2774 rtx label_ref = XVECEXP (unspec, 0, 0);
2775 rtx label = XEXP (label_ref, 0);
2778 for (branch = NEXT_INSN (label);
2779 !JUMP_P (branch) && !CALL_P (branch);
2780 branch = NEXT_INSN (branch))
2781 if (NONJUMP_INSN_P (branch))
2782 offset += get_attr_length (branch);
2784 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2787 if (spu_flag_var_tracking)
2790 timevar_push (TV_VAR_TRACKING);
2791 variable_tracking_main ();
2792 timevar_pop (TV_VAR_TRACKING);
2793 df_finish_pass (false);
2796 free_bb_for_insn ();
2802 /* Insn scheduling routines, primarily for dual issue. */
2804 spu_sched_issue_rate (void)
2810 uses_ls_unit(rtx insn)
2812 rtx set = single_set (insn);
2814 && (GET_CODE (SET_DEST (set)) == MEM
2815 || GET_CODE (SET_SRC (set)) == MEM))
2824 /* Handle inline asm */
2825 if (INSN_CODE (insn) == -1)
2827 t = get_attr_type (insn);
2852 case TYPE_IPREFETCH:
2860 /* haifa-sched.c has a static variable that keeps track of the current
2861 cycle. It is passed to spu_sched_reorder, and we record it here for
2862 use by spu_sched_variable_issue. It won't be accurate if the
2863 scheduler updates it's clock_var between the two calls. */
2864 static int clock_var;
2866 /* This is used to keep track of insn alignment. Set to 0 at the
2867 beginning of each block and increased by the "length" attr of each
2869 static int spu_sched_length;
2871 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2872 ready list appropriately in spu_sched_reorder(). */
2873 static int pipe0_clock;
2874 static int pipe1_clock;
2876 static int prev_clock_var;
2878 static int prev_priority;
2880 /* The SPU needs to load the next ilb sometime during the execution of
2881 the previous ilb. There is a potential conflict if every cycle has a
2882 load or store. To avoid the conflict we make sure the load/store
2883 unit is free for at least one cycle during the execution of insns in
2884 the previous ilb. */
2885 static int spu_ls_first;
2886 static int prev_ls_clock;
2889 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2890 int max_ready ATTRIBUTE_UNUSED)
2892 spu_sched_length = 0;
2896 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2897 int max_ready ATTRIBUTE_UNUSED)
2899 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2901 /* When any block might be at least 8-byte aligned, assume they
2902 will all be at least 8-byte aligned to make sure dual issue
2903 works out correctly. */
2904 spu_sched_length = 0;
2906 spu_ls_first = INT_MAX;
2911 prev_clock_var = -1;
2916 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2917 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2921 if (GET_CODE (PATTERN (insn)) == USE
2922 || GET_CODE (PATTERN (insn)) == CLOBBER
2923 || (len = get_attr_length (insn)) == 0)
2926 spu_sched_length += len;
2928 /* Reset on inline asm */
2929 if (INSN_CODE (insn) == -1)
2931 spu_ls_first = INT_MAX;
2936 p = get_pipe (insn);
2938 pipe0_clock = clock_var;
2940 pipe1_clock = clock_var;
2944 if (clock_var - prev_ls_clock > 1
2945 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2946 spu_ls_first = INT_MAX;
2947 if (uses_ls_unit (insn))
2949 if (spu_ls_first == INT_MAX)
2950 spu_ls_first = spu_sched_length;
2951 prev_ls_clock = clock_var;
2954 /* The scheduler hasn't inserted the nop, but we will later on.
2955 Include those nops in spu_sched_length. */
2956 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2957 spu_sched_length += 4;
2958 prev_clock_var = clock_var;
2960 /* more is -1 when called from spu_sched_reorder for new insns
2961 that don't have INSN_PRIORITY */
2963 prev_priority = INSN_PRIORITY (insn);
2966 /* Always try issueing more insns. spu_sched_reorder will decide
2967 when the cycle should be advanced. */
2971 /* This function is called for both TARGET_SCHED_REORDER and
2972 TARGET_SCHED_REORDER2. */
2974 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2975 rtx *ready, int *nreadyp, int clock)
2977 int i, nready = *nreadyp;
2978 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2983 if (nready <= 0 || pipe1_clock >= clock)
2986 /* Find any rtl insns that don't generate assembly insns and schedule
2988 for (i = nready - 1; i >= 0; i--)
2991 if (INSN_CODE (insn) == -1
2992 || INSN_CODE (insn) == CODE_FOR_blockage
2993 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2995 ready[i] = ready[nready - 1];
2996 ready[nready - 1] = insn;
3001 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3002 for (i = 0; i < nready; i++)
3003 if (INSN_CODE (ready[i]) != -1)
3006 switch (get_attr_type (insn))
3031 case TYPE_IPREFETCH:
3037 /* In the first scheduling phase, schedule loads and stores together
3038 to increase the chance they will get merged during postreload CSE. */
3039 if (!reload_completed && pipe_ls >= 0)
3041 insn = ready[pipe_ls];
3042 ready[pipe_ls] = ready[nready - 1];
3043 ready[nready - 1] = insn;
3047 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3051 /* When we have loads/stores in every cycle of the last 15 insns and
3052 we are about to schedule another load/store, emit an hbrp insn
3055 && spu_sched_length - spu_ls_first >= 4 * 15
3056 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3058 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3059 recog_memoized (insn);
3060 if (pipe0_clock < clock)
3061 PUT_MODE (insn, TImode);
3062 spu_sched_variable_issue (file, verbose, insn, -1);
3066 /* In general, we want to emit nops to increase dual issue, but dual
3067 issue isn't faster when one of the insns could be scheduled later
3068 without effecting the critical path. We look at INSN_PRIORITY to
3069 make a good guess, but it isn't perfect so -mdual-nops=n can be
3070 used to effect it. */
3071 if (in_spu_reorg && spu_dual_nops < 10)
3073 /* When we are at an even address and we are not issueing nops to
3074 improve scheduling then we need to advance the cycle. */
3075 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3076 && (spu_dual_nops == 0
3079 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3082 /* When at an odd address, schedule the highest priority insn
3083 without considering pipeline. */
3084 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3085 && (spu_dual_nops == 0
3087 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3092 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3093 pipe0 insn in the ready list, schedule it. */
3094 if (pipe0_clock < clock && pipe_0 >= 0)
3095 schedule_i = pipe_0;
3097 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3098 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3100 schedule_i = pipe_1;
3102 if (schedule_i > -1)
3104 insn = ready[schedule_i];
3105 ready[schedule_i] = ready[nready - 1];
3106 ready[nready - 1] = insn;
3112 /* INSN is dependent on DEP_INSN. */
3114 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3118 /* The blockage pattern is used to prevent instructions from being
3119 moved across it and has no cost. */
3120 if (INSN_CODE (insn) == CODE_FOR_blockage
3121 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3124 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3125 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3128 /* Make sure hbrps are spread out. */
3129 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3130 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3133 /* Make sure hints and hbrps are 2 cycles apart. */
3134 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3135 || INSN_CODE (insn) == CODE_FOR_hbr)
3136 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3137 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3140 /* An hbrp has no real dependency on other insns. */
3141 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3142 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3145 /* Assuming that it is unlikely an argument register will be used in
3146 the first cycle of the called function, we reduce the cost for
3147 slightly better scheduling of dep_insn. When not hinted, the
3148 mispredicted branch would hide the cost as well. */
3151 rtx target = get_branch_target (insn);
3152 if (GET_CODE (target) != REG || !set_of (target, insn))
3157 /* And when returning from a function, let's assume the return values
3158 are completed sooner too. */
3159 if (CALL_P (dep_insn))
3162 /* Make sure an instruction that loads from the back chain is schedule
3163 away from the return instruction so a hint is more likely to get
3165 if (INSN_CODE (insn) == CODE_FOR__return
3166 && (set = single_set (dep_insn))
3167 && GET_CODE (SET_DEST (set)) == REG
3168 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3171 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3172 scheduler makes every insn in a block anti-dependent on the final
3173 jump_insn. We adjust here so higher cost insns will get scheduled
3175 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3176 return insn_cost (dep_insn) - 3;
3181 /* Create a CONST_DOUBLE from a string. */
3183 spu_float_const (const char *string, enum machine_mode mode)
3185 REAL_VALUE_TYPE value;
3186 value = REAL_VALUE_ATOF (string, mode);
3187 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3191 spu_constant_address_p (rtx x)
3193 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3194 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3195 || GET_CODE (x) == HIGH);
3198 static enum spu_immediate
3199 which_immediate_load (HOST_WIDE_INT val)
3201 gcc_assert (val == trunc_int_for_mode (val, SImode));
3203 if (val >= -0x8000 && val <= 0x7fff)
3205 if (val >= 0 && val <= 0x3ffff)
3207 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3209 if ((val & 0xffff) == 0)
3215 /* Return true when OP can be loaded by one of the il instructions, or
3216 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3218 immediate_load_p (rtx op, enum machine_mode mode)
3220 if (CONSTANT_P (op))
3222 enum immediate_class c = classify_immediate (op, mode);
3223 return c == IC_IL1 || c == IC_IL1s
3224 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3229 /* Return true if the first SIZE bytes of arr is a constant that can be
3230 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3231 represent the size and offset of the instruction to use. */
3233 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3235 int cpat, run, i, start;
3239 for (i = 0; i < size && cpat; i++)
3247 else if (arr[i] == 2 && arr[i+1] == 3)
3249 else if (arr[i] == 0)
3251 while (arr[i+run] == run && i+run < 16)
3253 if (run != 4 && run != 8)
3258 if ((i & (run-1)) != 0)
3265 if (cpat && (run || size < 16))
3272 *pstart = start == -1 ? 16-run : start;
3278 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3279 it into a register. MODE is only valid when OP is a CONST_INT. */
3280 static enum immediate_class
3281 classify_immediate (rtx op, enum machine_mode mode)
3284 unsigned char arr[16];
3285 int i, j, repeated, fsmbi, repeat;
3287 gcc_assert (CONSTANT_P (op));
3289 if (GET_MODE (op) != VOIDmode)
3290 mode = GET_MODE (op);
3292 /* A V4SI const_vector with all identical symbols is ok. */
3295 && GET_CODE (op) == CONST_VECTOR
3296 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3297 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3298 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3299 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3300 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3301 op = CONST_VECTOR_ELT (op, 0);
3303 switch (GET_CODE (op))
3307 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3310 /* We can never know if the resulting address fits in 18 bits and can be
3311 loaded with ila. For now, assume the address will not overflow if
3312 the displacement is "small" (fits 'K' constraint). */
3313 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3315 rtx sym = XEXP (XEXP (op, 0), 0);
3316 rtx cst = XEXP (XEXP (op, 0), 1);
3318 if (GET_CODE (sym) == SYMBOL_REF
3319 && GET_CODE (cst) == CONST_INT
3320 && satisfies_constraint_K (cst))
3329 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3330 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3331 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3337 constant_to_array (mode, op, arr);
3339 /* Check that each 4-byte slot is identical. */
3341 for (i = 4; i < 16; i += 4)
3342 for (j = 0; j < 4; j++)
3343 if (arr[j] != arr[i + j])
3348 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3349 val = trunc_int_for_mode (val, SImode);
3351 if (which_immediate_load (val) != SPU_NONE)
3355 /* Any mode of 2 bytes or smaller can be loaded with an il
3357 gcc_assert (GET_MODE_SIZE (mode) > 2);
3361 for (i = 0; i < 16 && fsmbi; i++)
3362 if (arr[i] != 0 && repeat == 0)
3364 else if (arr[i] != 0 && arr[i] != repeat)
3367 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3369 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3382 static enum spu_immediate
3383 which_logical_immediate (HOST_WIDE_INT val)
3385 gcc_assert (val == trunc_int_for_mode (val, SImode));
3387 if (val >= -0x200 && val <= 0x1ff)
3389 if (val >= 0 && val <= 0xffff)
3391 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3393 val = trunc_int_for_mode (val, HImode);
3394 if (val >= -0x200 && val <= 0x1ff)
3396 if ((val & 0xff) == ((val >> 8) & 0xff))
3398 val = trunc_int_for_mode (val, QImode);
3399 if (val >= -0x200 && val <= 0x1ff)
3406 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3409 const_vector_immediate_p (rtx x)
3412 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3413 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3414 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3415 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3421 logical_immediate_p (rtx op, enum machine_mode mode)
3424 unsigned char arr[16];
3427 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3428 || GET_CODE (op) == CONST_VECTOR);
3430 if (GET_CODE (op) == CONST_VECTOR
3431 && !const_vector_immediate_p (op))
3434 if (GET_MODE (op) != VOIDmode)
3435 mode = GET_MODE (op);
3437 constant_to_array (mode, op, arr);
3439 /* Check that bytes are repeated. */
3440 for (i = 4; i < 16; i += 4)
3441 for (j = 0; j < 4; j++)
3442 if (arr[j] != arr[i + j])
3445 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3446 val = trunc_int_for_mode (val, SImode);
3448 i = which_logical_immediate (val);
3449 return i != SPU_NONE && i != SPU_IOHL;
3453 iohl_immediate_p (rtx op, enum machine_mode mode)
3456 unsigned char arr[16];
3459 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3460 || GET_CODE (op) == CONST_VECTOR);
3462 if (GET_CODE (op) == CONST_VECTOR
3463 && !const_vector_immediate_p (op))
3466 if (GET_MODE (op) != VOIDmode)
3467 mode = GET_MODE (op);
3469 constant_to_array (mode, op, arr);
3471 /* Check that bytes are repeated. */
3472 for (i = 4; i < 16; i += 4)
3473 for (j = 0; j < 4; j++)
3474 if (arr[j] != arr[i + j])
3477 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3478 val = trunc_int_for_mode (val, SImode);
3480 return val >= 0 && val <= 0xffff;
3484 arith_immediate_p (rtx op, enum machine_mode mode,
3485 HOST_WIDE_INT low, HOST_WIDE_INT high)
3488 unsigned char arr[16];
3491 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3492 || GET_CODE (op) == CONST_VECTOR);
3494 if (GET_CODE (op) == CONST_VECTOR
3495 && !const_vector_immediate_p (op))
3498 if (GET_MODE (op) != VOIDmode)
3499 mode = GET_MODE (op);
3501 constant_to_array (mode, op, arr);
3503 if (VECTOR_MODE_P (mode))
3504 mode = GET_MODE_INNER (mode);
3506 bytes = GET_MODE_SIZE (mode);
3507 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3509 /* Check that bytes are repeated. */
3510 for (i = bytes; i < 16; i += bytes)
3511 for (j = 0; j < bytes; j++)
3512 if (arr[j] != arr[i + j])
3516 for (j = 1; j < bytes; j++)
3517 val = (val << 8) | arr[j];
3519 val = trunc_int_for_mode (val, mode);
3521 return val >= low && val <= high;
3524 /* TRUE when op is an immediate and an exact power of 2, and given that
3525 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3526 all entries must be the same. */
3528 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3530 enum machine_mode int_mode;
3532 unsigned char arr[16];
3535 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3536 || GET_CODE (op) == CONST_VECTOR);
3538 if (GET_CODE (op) == CONST_VECTOR
3539 && !const_vector_immediate_p (op))
3542 if (GET_MODE (op) != VOIDmode)
3543 mode = GET_MODE (op);
3545 constant_to_array (mode, op, arr);
3547 if (VECTOR_MODE_P (mode))
3548 mode = GET_MODE_INNER (mode);
3550 bytes = GET_MODE_SIZE (mode);
3551 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3553 /* Check that bytes are repeated. */
3554 for (i = bytes; i < 16; i += bytes)
3555 for (j = 0; j < bytes; j++)
3556 if (arr[j] != arr[i + j])
3560 for (j = 1; j < bytes; j++)
3561 val = (val << 8) | arr[j];
3563 val = trunc_int_for_mode (val, int_mode);
3565 /* Currently, we only handle SFmode */
3566 gcc_assert (mode == SFmode);
3569 int exp = (val >> 23) - 127;
3570 return val > 0 && (val & 0x007fffff) == 0
3571 && exp >= low && exp <= high;
3577 - any 32-bit constant (SImode, SFmode)
3578 - any constant that can be generated with fsmbi (any mode)
3579 - a 64-bit constant where the high and low bits are identical
3581 - a 128-bit constant where the four 32-bit words match. */
3583 spu_legitimate_constant_p (rtx x)
3585 if (GET_CODE (x) == HIGH)
3587 /* V4SI with all identical symbols is valid. */
3589 && GET_MODE (x) == V4SImode
3590 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3591 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3592 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3593 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3594 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3595 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3597 if (GET_CODE (x) == CONST_VECTOR
3598 && !const_vector_immediate_p (x))
3603 /* Valid address are:
3604 - symbol_ref, label_ref, const
3606 - reg + const, where either reg or const is 16 byte aligned
3607 - reg + reg, alignment doesn't matter
3608 The alignment matters in the reg+const case because lqd and stqd
3609 ignore the 4 least significant bits of the const. (TODO: It might be
3610 preferable to allow any alignment and fix it up when splitting.) */
3612 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3613 rtx x, int reg_ok_strict)
3615 if (mode == TImode && GET_CODE (x) == AND
3616 && GET_CODE (XEXP (x, 1)) == CONST_INT
3617 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3619 switch (GET_CODE (x))
3623 return !TARGET_LARGE_MEM;
3626 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3628 rtx sym = XEXP (XEXP (x, 0), 0);
3629 rtx cst = XEXP (XEXP (x, 0), 1);
3631 /* Accept any symbol_ref + constant, assuming it does not
3632 wrap around the local store addressability limit. */
3633 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3639 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3643 gcc_assert (GET_CODE (x) == REG);
3646 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3651 rtx op0 = XEXP (x, 0);
3652 rtx op1 = XEXP (x, 1);
3653 if (GET_CODE (op0) == SUBREG)
3654 op0 = XEXP (op0, 0);
3655 if (GET_CODE (op1) == SUBREG)
3656 op1 = XEXP (op1, 0);
3657 /* We can't just accept any aligned register because CSE can
3658 change it to a register that is not marked aligned and then
3659 recog will fail. So we only accept frame registers because
3660 they will only be changed to other frame registers. */
3661 if (GET_CODE (op0) == REG
3662 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3663 && GET_CODE (op1) == CONST_INT
3664 && INTVAL (op1) >= -0x2000
3665 && INTVAL (op1) <= 0x1fff
3666 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
3668 if (GET_CODE (op0) == REG
3669 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3670 && GET_CODE (op1) == REG
3671 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3682 /* When the address is reg + const_int, force the const_int into a
3685 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3686 enum machine_mode mode)
3689 /* Make sure both operands are registers. */
3690 if (GET_CODE (x) == PLUS)
3694 if (ALIGNED_SYMBOL_REF_P (op0))
3696 op0 = force_reg (Pmode, op0);
3697 mark_reg_pointer (op0, 128);
3699 else if (GET_CODE (op0) != REG)
3700 op0 = force_reg (Pmode, op0);
3701 if (ALIGNED_SYMBOL_REF_P (op1))
3703 op1 = force_reg (Pmode, op1);
3704 mark_reg_pointer (op1, 128);
3706 else if (GET_CODE (op1) != REG)
3707 op1 = force_reg (Pmode, op1);
3708 x = gen_rtx_PLUS (Pmode, op0, op1);
3709 if (spu_legitimate_address (mode, x, 0))
3715 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3716 struct attribute_spec.handler. */
3718 spu_handle_fndecl_attribute (tree * node,
3720 tree args ATTRIBUTE_UNUSED,
3721 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3723 if (TREE_CODE (*node) != FUNCTION_DECL)
3725 warning (0, "`%s' attribute only applies to functions",
3726 IDENTIFIER_POINTER (name));
3727 *no_add_attrs = true;
3733 /* Handle the "vector" attribute. */
3735 spu_handle_vector_attribute (tree * node, tree name,
3736 tree args ATTRIBUTE_UNUSED,
3737 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3739 tree type = *node, result = NULL_TREE;
3740 enum machine_mode mode;
3743 while (POINTER_TYPE_P (type)
3744 || TREE_CODE (type) == FUNCTION_TYPE
3745 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3746 type = TREE_TYPE (type);
3748 mode = TYPE_MODE (type);
3750 unsigned_p = TYPE_UNSIGNED (type);
3754 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3757 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3760 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3763 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3766 result = V4SF_type_node;
3769 result = V2DF_type_node;
3775 /* Propagate qualifiers attached to the element type
3776 onto the vector type. */
3777 if (result && result != type && TYPE_QUALS (type))
3778 result = build_qualified_type (result, TYPE_QUALS (type));
3780 *no_add_attrs = true; /* No need to hang on to the attribute. */
3783 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3785 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3790 /* Return nonzero if FUNC is a naked function. */
3792 spu_naked_function_p (tree func)
3796 if (TREE_CODE (func) != FUNCTION_DECL)
3799 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3800 return a != NULL_TREE;
3804 spu_initial_elimination_offset (int from, int to)
3806 int saved_regs_size = spu_saved_regs_size ();
3808 if (!current_function_is_leaf || crtl->outgoing_args_size
3809 || get_frame_size () || saved_regs_size)
3810 sp_offset = STACK_POINTER_OFFSET;
3811 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3812 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3813 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3814 return get_frame_size ();
3815 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3816 return sp_offset + crtl->outgoing_args_size
3817 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3818 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3819 return get_frame_size () + saved_regs_size + sp_offset;
3825 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3827 enum machine_mode mode = TYPE_MODE (type);
3828 int byte_size = ((mode == BLKmode)
3829 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3831 /* Make sure small structs are left justified in a register. */
3832 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3833 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3835 enum machine_mode smode;
3838 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3839 int n = byte_size / UNITS_PER_WORD;
3840 v = rtvec_alloc (nregs);
3841 for (i = 0; i < n; i++)
3843 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3844 gen_rtx_REG (TImode,
3847 GEN_INT (UNITS_PER_WORD * i));
3848 byte_size -= UNITS_PER_WORD;
3856 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3858 gen_rtx_EXPR_LIST (VOIDmode,
3859 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3860 GEN_INT (UNITS_PER_WORD * n));
3862 return gen_rtx_PARALLEL (mode, v);
3864 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3868 spu_function_arg (CUMULATIVE_ARGS cum,
3869 enum machine_mode mode,
3870 tree type, int named ATTRIBUTE_UNUSED)
3874 if (cum >= MAX_REGISTER_ARGS)
3877 byte_size = ((mode == BLKmode)
3878 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3880 /* The ABI does not allow parameters to be passed partially in
3881 reg and partially in stack. */
3882 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3885 /* Make sure small structs are left justified in a register. */
3886 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3887 && byte_size < UNITS_PER_WORD && byte_size > 0)
3889 enum machine_mode smode;
3893 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3894 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3895 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3897 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3900 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3903 /* Variable sized types are passed by reference. */
3905 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3906 enum machine_mode mode ATTRIBUTE_UNUSED,
3907 const_tree type, bool named ATTRIBUTE_UNUSED)
3909 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3915 /* Create and return the va_list datatype.
3917 On SPU, va_list is an array type equivalent to
3919 typedef struct __va_list_tag
3921 void *__args __attribute__((__aligned(16)));
3922 void *__skip __attribute__((__aligned(16)));
3926 where __args points to the arg that will be returned by the next
3927 va_arg(), and __skip points to the previous stack frame such that
3928 when __args == __skip we should advance __args by 32 bytes. */
3930 spu_build_builtin_va_list (void)
3932 tree f_args, f_skip, record, type_decl;
3935 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3938 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3940 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3941 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3943 DECL_FIELD_CONTEXT (f_args) = record;
3944 DECL_ALIGN (f_args) = 128;
3945 DECL_USER_ALIGN (f_args) = 1;
3947 DECL_FIELD_CONTEXT (f_skip) = record;
3948 DECL_ALIGN (f_skip) = 128;
3949 DECL_USER_ALIGN (f_skip) = 1;
3951 TREE_CHAIN (record) = type_decl;
3952 TYPE_NAME (record) = type_decl;
3953 TYPE_FIELDS (record) = f_args;
3954 TREE_CHAIN (f_args) = f_skip;
3956 /* We know this is being padded and we want it too. It is an internal
3957 type so hide the warnings from the user. */
3959 warn_padded = false;
3961 layout_type (record);
3965 /* The correct type is an array type of one element. */
3966 return build_array_type (record, build_index_type (size_zero_node));
3969 /* Implement va_start by filling the va_list structure VALIST.
3970 NEXTARG points to the first anonymous stack argument.
3972 The following global variables are used to initialize
3973 the va_list structure:
3976 the CUMULATIVE_ARGS for this function
3978 crtl->args.arg_offset_rtx:
3979 holds the offset of the first anonymous stack argument
3980 (relative to the virtual arg pointer). */
3983 spu_va_start (tree valist, rtx nextarg)
3985 tree f_args, f_skip;
3988 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3989 f_skip = TREE_CHAIN (f_args);
3991 valist = build_va_arg_indirect_ref (valist);
3993 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3995 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3997 /* Find the __args area. */
3998 t = make_tree (TREE_TYPE (args), nextarg);
3999 if (crtl->args.pretend_args_size > 0)
4000 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4001 size_int (-STACK_POINTER_OFFSET));
4002 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4003 TREE_SIDE_EFFECTS (t) = 1;
4004 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4006 /* Find the __skip area. */
4007 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4008 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4009 size_int (crtl->args.pretend_args_size
4010 - STACK_POINTER_OFFSET));
4011 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4012 TREE_SIDE_EFFECTS (t) = 1;
4013 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4016 /* Gimplify va_arg by updating the va_list structure
4017 VALIST as required to retrieve an argument of type
4018 TYPE, and returning that argument.
4020 ret = va_arg(VALIST, TYPE);
4022 generates code equivalent to:
4024 paddedsize = (sizeof(TYPE) + 15) & -16;
4025 if (VALIST.__args + paddedsize > VALIST.__skip
4026 && VALIST.__args <= VALIST.__skip)
4027 addr = VALIST.__skip + 32;
4029 addr = VALIST.__args;
4030 VALIST.__args = addr + paddedsize;
4031 ret = *(TYPE *)addr;
4034 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4035 gimple_seq * post_p ATTRIBUTE_UNUSED)
4037 tree f_args, f_skip;
4039 HOST_WIDE_INT size, rsize;
4040 tree paddedsize, addr, tmp;
4041 bool pass_by_reference_p;
4043 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4044 f_skip = TREE_CHAIN (f_args);
4046 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4048 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4050 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4052 addr = create_tmp_var (ptr_type_node, "va_arg");
4053 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4055 /* if an object is dynamically sized, a pointer to it is passed
4056 instead of the object itself. */
4057 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4059 if (pass_by_reference_p)
4060 type = build_pointer_type (type);
4061 size = int_size_in_bytes (type);
4062 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4064 /* build conditional expression to calculate addr. The expression
4065 will be gimplified later. */
4066 paddedsize = size_int (rsize);
4067 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4068 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4069 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4070 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4071 unshare_expr (skip)));
4073 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4074 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4075 size_int (32)), unshare_expr (args));
4077 gimplify_assign (addr, tmp, pre_p);
4079 /* update VALIST.__args */
4080 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4081 gimplify_assign (unshare_expr (args), tmp, pre_p);
4083 addr = fold_convert (build_pointer_type (type), addr);
4085 if (pass_by_reference_p)
4086 addr = build_va_arg_indirect_ref (addr);
4088 return build_va_arg_indirect_ref (addr);
4091 /* Save parameter registers starting with the register that corresponds
4092 to the first unnamed parameters. If the first unnamed parameter is
4093 in the stack then save no registers. Set pretend_args_size to the
4094 amount of space needed to save the registers. */
4096 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4097 tree type, int *pretend_size, int no_rtl)
4106 /* cum currently points to the last named argument, we want to
4107 start at the next argument. */
4108 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4110 offset = -STACK_POINTER_OFFSET;
4111 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4113 tmp = gen_frame_mem (V4SImode,
4114 plus_constant (virtual_incoming_args_rtx,
4116 emit_move_insn (tmp,
4117 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4120 *pretend_size = offset + STACK_POINTER_OFFSET;
4125 spu_conditional_register_usage (void)
4129 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4130 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4134 /* This is called to decide when we can simplify a load instruction. We
4135 must only return true for registers which we know will always be
4136 aligned. Taking into account that CSE might replace this reg with
4137 another one that has not been marked aligned.
4138 So this is really only true for frame, stack and virtual registers,
4139 which we know are always aligned and should not be adversely effected
4142 regno_aligned_for_load (int regno)
4144 return regno == FRAME_POINTER_REGNUM
4145 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
4146 || regno == ARG_POINTER_REGNUM
4147 || regno == STACK_POINTER_REGNUM
4148 || (regno >= FIRST_VIRTUAL_REGISTER
4149 && regno <= LAST_VIRTUAL_REGISTER);
4152 /* Return TRUE when mem is known to be 16-byte aligned. */
4154 aligned_mem_p (rtx mem)
4156 if (MEM_ALIGN (mem) >= 128)
4158 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4160 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4162 rtx p0 = XEXP (XEXP (mem, 0), 0);
4163 rtx p1 = XEXP (XEXP (mem, 0), 1);
4164 if (regno_aligned_for_load (REGNO (p0)))
4166 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4168 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4172 else if (GET_CODE (XEXP (mem, 0)) == REG)
4174 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4177 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4179 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4181 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4182 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4183 if (GET_CODE (p0) == SYMBOL_REF
4184 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4190 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4191 into its SYMBOL_REF_FLAGS. */
4193 spu_encode_section_info (tree decl, rtx rtl, int first)
4195 default_encode_section_info (decl, rtl, first);
4197 /* If a variable has a forced alignment to < 16 bytes, mark it with
4198 SYMBOL_FLAG_ALIGN1. */
4199 if (TREE_CODE (decl) == VAR_DECL
4200 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4201 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4204 /* Return TRUE if we are certain the mem refers to a complete object
4205 which is both 16-byte aligned and padded to a 16-byte boundary. This
4206 would make it safe to store with a single instruction.
4207 We guarantee the alignment and padding for static objects by aligning
4208 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4209 FIXME: We currently cannot guarantee this for objects on the stack
4210 because assign_parm_setup_stack calls assign_stack_local with the
4211 alignment of the parameter mode and in that case the alignment never
4212 gets adjusted by LOCAL_ALIGNMENT. */
4214 store_with_one_insn_p (rtx mem)
4216 rtx addr = XEXP (mem, 0);
4217 if (GET_MODE (mem) == BLKmode)
4219 /* Only static objects. */
4220 if (GET_CODE (addr) == SYMBOL_REF)
4222 /* We use the associated declaration to make sure the access is
4223 referring to the whole object.
4224 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4225 if it is necessary. Will there be cases where one exists, and
4226 the other does not? Will there be cases where both exist, but
4227 have different types? */
4228 tree decl = MEM_EXPR (mem);
4230 && TREE_CODE (decl) == VAR_DECL
4231 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4233 decl = SYMBOL_REF_DECL (addr);
4235 && TREE_CODE (decl) == VAR_DECL
4236 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4243 spu_expand_mov (rtx * ops, enum machine_mode mode)
4245 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4248 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4250 rtx from = SUBREG_REG (ops[1]);
4251 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4253 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4254 && GET_MODE_CLASS (imode) == MODE_INT
4255 && subreg_lowpart_p (ops[1]));
4257 if (GET_MODE_SIZE (imode) < 4)
4259 if (imode != GET_MODE (from))
4260 from = gen_rtx_SUBREG (imode, from, 0);
4262 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4264 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4265 emit_insn (GEN_FCN (icode) (ops[0], from));
4268 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4272 /* At least one of the operands needs to be a register. */
4273 if ((reload_in_progress | reload_completed) == 0
4274 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4276 rtx temp = force_reg (mode, ops[1]);
4277 emit_move_insn (ops[0], temp);
4280 if (reload_in_progress || reload_completed)
4282 if (CONSTANT_P (ops[1]))
4283 return spu_split_immediate (ops);
4288 if (GET_CODE (ops[0]) == MEM)
4290 if (!spu_valid_move (ops))
4292 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4293 gen_reg_rtx (TImode)));
4297 else if (GET_CODE (ops[1]) == MEM)
4299 if (!spu_valid_move (ops))
4302 (ops[0], ops[1], gen_reg_rtx (TImode),
4303 gen_reg_rtx (SImode)));
4307 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4309 if (GET_CODE (ops[1]) == CONST_INT)
4311 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4312 if (val != INTVAL (ops[1]))
4314 emit_move_insn (ops[0], GEN_INT (val));
4323 spu_split_load (rtx * ops)
4325 enum machine_mode mode = GET_MODE (ops[0]);
4326 rtx addr, load, rot, mem, p0, p1;
4329 addr = XEXP (ops[1], 0);
4333 if (GET_CODE (addr) == PLUS)
4336 aligned reg + aligned reg => lqx
4337 aligned reg + unaligned reg => lqx, rotqby
4338 aligned reg + aligned const => lqd
4339 aligned reg + unaligned const => lqd, rotqbyi
4340 unaligned reg + aligned reg => lqx, rotqby
4341 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4342 unaligned reg + aligned const => lqd, rotqby
4343 unaligned reg + unaligned const -> not allowed by legitimate address
4345 p0 = XEXP (addr, 0);
4346 p1 = XEXP (addr, 1);
4347 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
4349 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4351 emit_insn (gen_addsi3 (ops[3], p0, p1));
4359 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4361 rot_amt = INTVAL (p1) & 15;
4362 p1 = GEN_INT (INTVAL (p1) & -16);
4363 addr = gen_rtx_PLUS (SImode, p0, p1);
4365 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4369 else if (GET_CODE (addr) == REG)
4371 if (!regno_aligned_for_load (REGNO (addr)))
4374 else if (GET_CODE (addr) == CONST)
4376 if (GET_CODE (XEXP (addr, 0)) == PLUS
4377 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4378 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4380 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4382 addr = gen_rtx_CONST (Pmode,
4383 gen_rtx_PLUS (Pmode,
4384 XEXP (XEXP (addr, 0), 0),
4385 GEN_INT (rot_amt & -16)));
4387 addr = XEXP (XEXP (addr, 0), 0);
4392 else if (GET_CODE (addr) == CONST_INT)
4394 rot_amt = INTVAL (addr);
4395 addr = GEN_INT (rot_amt & -16);
4397 else if (!ALIGNED_SYMBOL_REF_P (addr))
4400 if (GET_MODE_SIZE (mode) < 4)
4401 rot_amt += GET_MODE_SIZE (mode) - 4;
4407 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4414 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4415 mem = change_address (ops[1], TImode, addr);
4417 emit_insn (gen_movti (load, mem));
4420 emit_insn (gen_rotqby_ti (load, load, rot));
4422 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4424 if (reload_completed)
4425 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4427 emit_insn (gen_spu_convert (ops[0], load));
4431 spu_split_store (rtx * ops)
4433 enum machine_mode mode = GET_MODE (ops[0]);
4436 rtx addr, p0, p1, p1_lo, smem;
4440 addr = XEXP (ops[0], 0);
4442 if (GET_CODE (addr) == PLUS)
4445 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4446 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4447 aligned reg + aligned const => lqd, c?d, shuf, stqx
4448 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4449 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4450 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4451 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4452 unaligned reg + unaligned const -> not allowed by legitimate address
4455 p0 = XEXP (addr, 0);
4456 p1 = p1_lo = XEXP (addr, 1);
4457 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4459 p1_lo = GEN_INT (INTVAL (p1) & 15);
4460 p1 = GEN_INT (INTVAL (p1) & -16);
4461 addr = gen_rtx_PLUS (SImode, p0, p1);
4464 else if (GET_CODE (addr) == REG)
4468 p1 = p1_lo = const0_rtx;
4473 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4474 p1 = 0; /* aform doesn't use p1 */
4476 if (ALIGNED_SYMBOL_REF_P (addr))
4478 else if (GET_CODE (addr) == CONST)
4480 if (GET_CODE (XEXP (addr, 0)) == PLUS
4481 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4482 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4484 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4486 addr = gen_rtx_CONST (Pmode,
4487 gen_rtx_PLUS (Pmode,
4488 XEXP (XEXP (addr, 0), 0),
4489 GEN_INT (v & -16)));
4491 addr = XEXP (XEXP (addr, 0), 0);
4492 p1_lo = GEN_INT (v & 15);
4495 else if (GET_CODE (addr) == CONST_INT)
4497 p1_lo = GEN_INT (INTVAL (addr) & 15);
4498 addr = GEN_INT (INTVAL (addr) & -16);
4502 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4504 scalar = store_with_one_insn_p (ops[0]);
4507 /* We could copy the flags from the ops[0] MEM to mem here,
4508 We don't because we want this load to be optimized away if
4509 possible, and copying the flags will prevent that in certain
4510 cases, e.g. consider the volatile flag. */
4512 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4513 set_mem_alias_set (lmem, 0);
4514 emit_insn (gen_movti (reg, lmem));
4516 if (!p0 || regno_aligned_for_load (REGNO (p0)))
4517 p0 = stack_pointer_rtx;
4521 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4522 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4524 else if (reload_completed)
4526 if (GET_CODE (ops[1]) == REG)
4527 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4528 else if (GET_CODE (ops[1]) == SUBREG)
4529 emit_move_insn (reg,
4530 gen_rtx_REG (GET_MODE (reg),
4531 REGNO (SUBREG_REG (ops[1]))));
4537 if (GET_CODE (ops[1]) == REG)
4538 emit_insn (gen_spu_convert (reg, ops[1]));
4539 else if (GET_CODE (ops[1]) == SUBREG)
4540 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4545 if (GET_MODE_SIZE (mode) < 4 && scalar)
4546 emit_insn (gen_shlqby_ti
4547 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4549 smem = change_address (ops[0], TImode, addr);
4550 /* We can't use the previous alias set because the memory has changed
4551 size and can potentially overlap objects of other types. */
4552 set_mem_alias_set (smem, 0);
4554 emit_insn (gen_movti (smem, reg));
4557 /* Return TRUE if X is MEM which is a struct member reference
4558 and the member can safely be loaded and stored with a single
4559 instruction because it is padded. */
4561 mem_is_padded_component_ref (rtx x)
4563 tree t = MEM_EXPR (x);
4565 if (!t || TREE_CODE (t) != COMPONENT_REF)
4567 t = TREE_OPERAND (t, 1);
4568 if (!t || TREE_CODE (t) != FIELD_DECL
4569 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4571 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4572 r = DECL_FIELD_CONTEXT (t);
4573 if (!r || TREE_CODE (r) != RECORD_TYPE)
4575 /* Make sure they are the same mode */
4576 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4578 /* If there are no following fields then the field alignment assures
4579 the structure is padded to the alignment which means this field is
4581 if (TREE_CHAIN (t) == 0)
4583 /* If the following field is also aligned then this field will be
4586 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4591 /* Parse the -mfixed-range= option string. */
4593 fix_range (const char *const_str)
4596 char *str, *dash, *comma;
4598 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4599 REG2 are either register names or register numbers. The effect
4600 of this option is to mark the registers in the range from REG1 to
4601 REG2 as ``fixed'' so they won't be used by the compiler. */
4603 i = strlen (const_str);
4604 str = (char *) alloca (i + 1);
4605 memcpy (str, const_str, i + 1);
4609 dash = strchr (str, '-');
4612 warning (0, "value of -mfixed-range must have form REG1-REG2");
4616 comma = strchr (dash + 1, ',');
4620 first = decode_reg_name (str);
4623 warning (0, "unknown register name: %s", str);
4627 last = decode_reg_name (dash + 1);
4630 warning (0, "unknown register name: %s", dash + 1);
4638 warning (0, "%s-%s is an empty range", str, dash + 1);
4642 for (i = first; i <= last; ++i)
4643 fixed_regs[i] = call_used_regs[i] = 1;
4654 spu_valid_move (rtx * ops)
4656 enum machine_mode mode = GET_MODE (ops[0]);
4657 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4660 /* init_expr_once tries to recog against load and store insns to set
4661 the direct_load[] and direct_store[] arrays. We always want to
4662 consider those loads and stores valid. init_expr_once is called in
4663 the context of a dummy function which does not have a decl. */
4664 if (cfun->decl == 0)
4667 /* Don't allows loads/stores which would require more than 1 insn.
4668 During and after reload we assume loads and stores only take 1
4670 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4672 if (GET_CODE (ops[0]) == MEM
4673 && (GET_MODE_SIZE (mode) < 4
4674 || !(store_with_one_insn_p (ops[0])
4675 || mem_is_padded_component_ref (ops[0]))))
4677 if (GET_CODE (ops[1]) == MEM
4678 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4684 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4685 can be generated using the fsmbi instruction. */
4687 fsmbi_const_p (rtx x)
4691 /* We can always choose TImode for CONST_INT because the high bits
4692 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4693 enum immediate_class c = classify_immediate (x, TImode);
4694 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4699 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4700 can be generated using the cbd, chd, cwd or cdd instruction. */
4702 cpat_const_p (rtx x, enum machine_mode mode)
4706 enum immediate_class c = classify_immediate (x, mode);
4707 return c == IC_CPAT;
4713 gen_cpat_const (rtx * ops)
4715 unsigned char dst[16];
4716 int i, offset, shift, isize;
4717 if (GET_CODE (ops[3]) != CONST_INT
4718 || GET_CODE (ops[2]) != CONST_INT
4719 || (GET_CODE (ops[1]) != CONST_INT
4720 && GET_CODE (ops[1]) != REG))
4722 if (GET_CODE (ops[1]) == REG
4723 && (!REG_POINTER (ops[1])
4724 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4727 for (i = 0; i < 16; i++)
4729 isize = INTVAL (ops[3]);
4732 else if (isize == 2)
4736 offset = (INTVAL (ops[2]) +
4737 (GET_CODE (ops[1]) ==
4738 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4739 for (i = 0; i < isize; i++)
4740 dst[offset + i] = i + shift;
4741 return array_to_constant (TImode, dst);
4744 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4745 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4746 than 16 bytes, the value is repeated across the rest of the array. */
4748 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4753 memset (arr, 0, 16);
4754 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4755 if (GET_CODE (x) == CONST_INT
4756 || (GET_CODE (x) == CONST_DOUBLE
4757 && (mode == SFmode || mode == DFmode)))
4759 gcc_assert (mode != VOIDmode && mode != BLKmode);
4761 if (GET_CODE (x) == CONST_DOUBLE)
4762 val = const_double_to_hwint (x);
4765 first = GET_MODE_SIZE (mode) - 1;
4766 for (i = first; i >= 0; i--)
4768 arr[i] = val & 0xff;
4771 /* Splat the constant across the whole array. */
4772 for (j = 0, i = first + 1; i < 16; i++)
4775 j = (j == first) ? 0 : j + 1;
4778 else if (GET_CODE (x) == CONST_DOUBLE)
4780 val = CONST_DOUBLE_LOW (x);
4781 for (i = 15; i >= 8; i--)
4783 arr[i] = val & 0xff;
4786 val = CONST_DOUBLE_HIGH (x);
4787 for (i = 7; i >= 0; i--)
4789 arr[i] = val & 0xff;
4793 else if (GET_CODE (x) == CONST_VECTOR)
4797 mode = GET_MODE_INNER (mode);
4798 units = CONST_VECTOR_NUNITS (x);
4799 for (i = 0; i < units; i++)
4801 elt = CONST_VECTOR_ELT (x, i);
4802 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4804 if (GET_CODE (elt) == CONST_DOUBLE)
4805 val = const_double_to_hwint (elt);
4808 first = GET_MODE_SIZE (mode) - 1;
4809 if (first + i * GET_MODE_SIZE (mode) > 16)
4811 for (j = first; j >= 0; j--)
4813 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4823 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4824 smaller than 16 bytes, use the bytes that would represent that value
4825 in a register, e.g., for QImode return the value of arr[3]. */
4827 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4829 enum machine_mode inner_mode;
4831 int units, size, i, j, k;
4834 if (GET_MODE_CLASS (mode) == MODE_INT
4835 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4837 j = GET_MODE_SIZE (mode);
4838 i = j < 4 ? 4 - j : 0;
4839 for (val = 0; i < j; i++)
4840 val = (val << 8) | arr[i];
4841 val = trunc_int_for_mode (val, mode);
4842 return GEN_INT (val);
4848 for (i = high = 0; i < 8; i++)
4849 high = (high << 8) | arr[i];
4850 for (i = 8, val = 0; i < 16; i++)
4851 val = (val << 8) | arr[i];
4852 return immed_double_const (val, high, TImode);
4856 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4857 val = trunc_int_for_mode (val, SImode);
4858 return hwint_to_const_double (SFmode, val);
4862 for (i = 0, val = 0; i < 8; i++)
4863 val = (val << 8) | arr[i];
4864 return hwint_to_const_double (DFmode, val);
4867 if (!VECTOR_MODE_P (mode))
4870 units = GET_MODE_NUNITS (mode);
4871 size = GET_MODE_UNIT_SIZE (mode);
4872 inner_mode = GET_MODE_INNER (mode);
4873 v = rtvec_alloc (units);
4875 for (k = i = 0; i < units; ++i)
4878 for (j = 0; j < size; j++, k++)
4879 val = (val << 8) | arr[k];
4881 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4882 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4884 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4889 return gen_rtx_CONST_VECTOR (mode, v);
4893 reloc_diagnostic (rtx x)
4895 tree loc_decl, decl = 0;
4897 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4900 if (GET_CODE (x) == SYMBOL_REF)
4901 decl = SYMBOL_REF_DECL (x);
4902 else if (GET_CODE (x) == CONST
4903 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4904 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4906 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4907 if (decl && !DECL_P (decl))
4910 /* We use last_assemble_variable_decl to get line information. It's
4911 not always going to be right and might not even be close, but will
4912 be right for the more common cases. */
4913 if (!last_assemble_variable_decl || in_section == ctors_section)
4916 loc_decl = last_assemble_variable_decl;
4918 /* The decl could be a string constant. */
4919 if (decl && DECL_P (decl))
4920 msg = "%Jcreating run-time relocation for %qD";
4922 msg = "creating run-time relocation";
4924 if (TARGET_WARN_RELOC)
4925 warning (0, msg, loc_decl, decl);
4927 error (msg, loc_decl, decl);
4930 /* Hook into assemble_integer so we can generate an error for run-time
4931 relocations. The SPU ABI disallows them. */
4933 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4935 /* By default run-time relocations aren't supported, but we allow them
4936 in case users support it in their own run-time loader. And we provide
4937 a warning for those users that don't. */
4938 if ((GET_CODE (x) == SYMBOL_REF)
4939 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4940 reloc_diagnostic (x);
4942 return default_assemble_integer (x, size, aligned_p);
4946 spu_asm_globalize_label (FILE * file, const char *name)
4948 fputs ("\t.global\t", file);
4949 assemble_name (file, name);
4954 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4955 bool speed ATTRIBUTE_UNUSED)
4957 enum machine_mode mode = GET_MODE (x);
4958 int cost = COSTS_N_INSNS (2);
4960 /* Folding to a CONST_VECTOR will use extra space but there might
4961 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4962 only if it allows us to fold away multiple insns. Changing the cost
4963 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4964 because this cost will only be compared against a single insn.
4965 if (code == CONST_VECTOR)
4966 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4969 /* Use defaults for float operations. Not accurate but good enough. */
4972 *total = COSTS_N_INSNS (13);
4977 *total = COSTS_N_INSNS (6);
4983 if (satisfies_constraint_K (x))
4985 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4986 *total = COSTS_N_INSNS (1);
4988 *total = COSTS_N_INSNS (3);
4992 *total = COSTS_N_INSNS (3);
4997 *total = COSTS_N_INSNS (0);
5001 *total = COSTS_N_INSNS (5);
5005 case FLOAT_TRUNCATE:
5007 case UNSIGNED_FLOAT:
5010 *total = COSTS_N_INSNS (7);
5016 *total = COSTS_N_INSNS (9);
5023 GET_CODE (XEXP (x, 0)) ==
5024 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5025 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5027 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5029 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5030 cost = COSTS_N_INSNS (14);
5031 if ((val & 0xffff) == 0)
5032 cost = COSTS_N_INSNS (9);
5033 else if (val > 0 && val < 0x10000)
5034 cost = COSTS_N_INSNS (11);
5043 *total = COSTS_N_INSNS (20);
5050 *total = COSTS_N_INSNS (4);
5053 if (XINT (x, 1) == UNSPEC_CONVERT)
5054 *total = COSTS_N_INSNS (0);
5056 *total = COSTS_N_INSNS (4);
5059 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5060 if (GET_MODE_CLASS (mode) == MODE_INT
5061 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5062 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5063 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5068 static enum machine_mode
5069 spu_unwind_word_mode (void)
5074 /* Decide whether we can make a sibling call to a function. DECL is the
5075 declaration of the function being targeted by the call and EXP is the
5076 CALL_EXPR representing the call. */
5078 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5080 return decl && !TARGET_LARGE_MEM;
5083 /* We need to correctly update the back chain pointer and the Available
5084 Stack Size (which is in the second slot of the sp register.) */
5086 spu_allocate_stack (rtx op0, rtx op1)
5089 rtx chain = gen_reg_rtx (V4SImode);
5090 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5091 rtx sp = gen_reg_rtx (V4SImode);
5092 rtx splatted = gen_reg_rtx (V4SImode);
5093 rtx pat = gen_reg_rtx (TImode);
5095 /* copy the back chain so we can save it back again. */
5096 emit_move_insn (chain, stack_bot);
5098 op1 = force_reg (SImode, op1);
5100 v = 0x1020300010203ll;
5101 emit_move_insn (pat, immed_double_const (v, v, TImode));
5102 emit_insn (gen_shufb (splatted, op1, op1, pat));
5104 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5105 emit_insn (gen_subv4si3 (sp, sp, splatted));
5107 if (flag_stack_check)
5109 rtx avail = gen_reg_rtx(SImode);
5110 rtx result = gen_reg_rtx(SImode);
5111 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5112 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5113 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5116 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5118 emit_move_insn (stack_bot, chain);
5120 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5124 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5126 static unsigned char arr[16] =
5127 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5128 rtx temp = gen_reg_rtx (SImode);
5129 rtx temp2 = gen_reg_rtx (SImode);
5130 rtx temp3 = gen_reg_rtx (V4SImode);
5131 rtx temp4 = gen_reg_rtx (V4SImode);
5132 rtx pat = gen_reg_rtx (TImode);
5133 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5135 /* Restore the backchain from the first word, sp from the second. */
5136 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5137 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5139 emit_move_insn (pat, array_to_constant (TImode, arr));
5141 /* Compute Available Stack Size for sp */
5142 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5143 emit_insn (gen_shufb (temp3, temp, temp, pat));
5145 /* Compute Available Stack Size for back chain */
5146 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5147 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5148 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5150 emit_insn (gen_addv4si3 (sp, sp, temp3));
5151 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5155 spu_init_libfuncs (void)
5157 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5158 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5159 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5160 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5161 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5162 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5163 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5164 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5165 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5166 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5167 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5169 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5170 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5172 set_optab_libfunc (smul_optab, TImode, "__multi3");
5173 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5174 set_optab_libfunc (smod_optab, TImode, "__modti3");
5175 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5176 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5177 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5180 /* Make a subreg, stripping any existing subreg. We could possibly just
5181 call simplify_subreg, but in this case we know what we want. */
5183 spu_gen_subreg (enum machine_mode mode, rtx x)
5185 if (GET_CODE (x) == SUBREG)
5187 if (GET_MODE (x) == mode)
5189 return gen_rtx_SUBREG (mode, x, 0);
5193 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5195 return (TYPE_MODE (type) == BLKmode
5197 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5198 || int_size_in_bytes (type) >
5199 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5202 /* Create the built-in types and functions */
5204 enum spu_function_code
5206 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5207 #include "spu-builtins.def"
5212 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5214 struct spu_builtin_description spu_builtins[] = {
5215 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5216 {fcode, icode, name, type, params, NULL_TREE},
5217 #include "spu-builtins.def"
5222 spu_init_builtins (void)
5224 struct spu_builtin_description *d;
5227 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5228 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5229 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5230 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5231 V4SF_type_node = build_vector_type (float_type_node, 4);
5232 V2DF_type_node = build_vector_type (double_type_node, 2);
5234 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5235 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5236 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5237 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5239 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5241 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5242 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5243 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5244 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5245 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5246 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5247 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5248 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5249 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5250 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5251 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5252 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5254 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5255 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5256 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5257 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5258 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5259 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5260 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5261 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5263 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5264 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5266 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5268 spu_builtin_types[SPU_BTI_PTR] =
5269 build_pointer_type (build_qualified_type
5271 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5273 /* For each builtin we build a new prototype. The tree code will make
5274 sure nodes are shared. */
5275 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5278 char name[64]; /* build_function will make a copy. */
5284 /* Find last parm. */
5285 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5290 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5292 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5294 sprintf (name, "__builtin_%s", d->name);
5296 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5298 if (d->fcode == SPU_MASK_FOR_LOAD)
5299 TREE_READONLY (d->fndecl) = 1;
5301 /* These builtins don't throw. */
5302 TREE_NOTHROW (d->fndecl) = 1;
5307 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5309 static unsigned char arr[16] =
5310 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5312 rtx temp = gen_reg_rtx (Pmode);
5313 rtx temp2 = gen_reg_rtx (V4SImode);
5314 rtx temp3 = gen_reg_rtx (V4SImode);
5315 rtx pat = gen_reg_rtx (TImode);
5316 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5318 emit_move_insn (pat, array_to_constant (TImode, arr));
5320 /* Restore the sp. */
5321 emit_move_insn (temp, op1);
5322 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5324 /* Compute available stack size for sp. */
5325 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5326 emit_insn (gen_shufb (temp3, temp, temp, pat));
5328 emit_insn (gen_addv4si3 (sp, sp, temp3));
5329 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5333 spu_safe_dma (HOST_WIDE_INT channel)
5335 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5339 spu_builtin_splats (rtx ops[])
5341 enum machine_mode mode = GET_MODE (ops[0]);
5342 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5344 unsigned char arr[16];
5345 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5346 emit_move_insn (ops[0], array_to_constant (mode, arr));
5350 rtx reg = gen_reg_rtx (TImode);
5352 if (GET_CODE (ops[1]) != REG
5353 && GET_CODE (ops[1]) != SUBREG)
5354 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5360 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5366 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5371 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5376 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5382 emit_move_insn (reg, shuf);
5383 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5388 spu_builtin_extract (rtx ops[])
5390 enum machine_mode mode;
5393 mode = GET_MODE (ops[1]);
5395 if (GET_CODE (ops[2]) == CONST_INT)
5400 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5403 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5406 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5409 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5412 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5415 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5423 from = spu_gen_subreg (TImode, ops[1]);
5424 rot = gen_reg_rtx (TImode);
5425 tmp = gen_reg_rtx (SImode);
5430 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5433 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5434 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5438 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5442 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5447 emit_insn (gen_rotqby_ti (rot, from, tmp));
5449 emit_insn (gen_spu_convert (ops[0], rot));
5453 spu_builtin_insert (rtx ops[])
5455 enum machine_mode mode = GET_MODE (ops[0]);
5456 enum machine_mode imode = GET_MODE_INNER (mode);
5457 rtx mask = gen_reg_rtx (TImode);
5460 if (GET_CODE (ops[3]) == CONST_INT)
5461 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5464 offset = gen_reg_rtx (SImode);
5465 emit_insn (gen_mulsi3
5466 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5469 (mask, stack_pointer_rtx, offset,
5470 GEN_INT (GET_MODE_SIZE (imode))));
5471 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5475 spu_builtin_promote (rtx ops[])
5477 enum machine_mode mode, imode;
5478 rtx rot, from, offset;
5481 mode = GET_MODE (ops[0]);
5482 imode = GET_MODE_INNER (mode);
5484 from = gen_reg_rtx (TImode);
5485 rot = spu_gen_subreg (TImode, ops[0]);
5487 emit_insn (gen_spu_convert (from, ops[1]));
5489 if (GET_CODE (ops[2]) == CONST_INT)
5491 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5492 if (GET_MODE_SIZE (imode) < 4)
5493 pos += 4 - GET_MODE_SIZE (imode);
5494 offset = GEN_INT (pos & 15);
5498 offset = gen_reg_rtx (SImode);
5502 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5505 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5506 emit_insn (gen_addsi3 (offset, offset, offset));
5510 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5511 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5515 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5521 emit_insn (gen_rotqby_ti (rot, from, offset));
5525 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5527 rtx shuf = gen_reg_rtx (V4SImode);
5528 rtx insn = gen_reg_rtx (V4SImode);
5533 fnaddr = force_reg (SImode, fnaddr);
5534 cxt = force_reg (SImode, cxt);
5536 if (TARGET_LARGE_MEM)
5538 rtx rotl = gen_reg_rtx (V4SImode);
5539 rtx mask = gen_reg_rtx (V4SImode);
5540 rtx bi = gen_reg_rtx (SImode);
5541 unsigned char shufa[16] = {
5542 2, 3, 0, 1, 18, 19, 16, 17,
5543 0, 1, 2, 3, 16, 17, 18, 19
5545 unsigned char insna[16] = {
5547 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5549 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5552 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5553 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5555 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5556 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5557 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5558 emit_insn (gen_selb (insn, insnc, rotl, mask));
5560 mem = memory_address (Pmode, tramp);
5561 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5563 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5564 mem = memory_address (Pmode, plus_constant (tramp, 16));
5565 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5569 rtx scxt = gen_reg_rtx (SImode);
5570 rtx sfnaddr = gen_reg_rtx (SImode);
5571 unsigned char insna[16] = {
5572 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5578 shufc = gen_reg_rtx (TImode);
5579 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5581 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5582 fits 18 bits and the last 4 are zeros. This will be true if
5583 the stack pointer is initialized to 0x3fff0 at program start,
5584 otherwise the ila instruction will be garbage. */
5586 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5587 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5589 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5590 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5591 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5593 mem = memory_address (Pmode, tramp);
5594 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5597 emit_insn (gen_sync ());
5601 spu_expand_sign_extend (rtx ops[])
5603 unsigned char arr[16];
5604 rtx pat = gen_reg_rtx (TImode);
5607 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5608 if (GET_MODE (ops[1]) == QImode)
5610 sign = gen_reg_rtx (HImode);
5611 emit_insn (gen_extendqihi2 (sign, ops[1]));
5612 for (i = 0; i < 16; i++)
5618 for (i = 0; i < 16; i++)
5620 switch (GET_MODE (ops[1]))
5623 sign = gen_reg_rtx (SImode);
5624 emit_insn (gen_extendhisi2 (sign, ops[1]));
5626 arr[last - 1] = 0x02;
5629 sign = gen_reg_rtx (SImode);
5630 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5631 for (i = 0; i < 4; i++)
5632 arr[last - i] = 3 - i;
5635 sign = gen_reg_rtx (SImode);
5636 c = gen_reg_rtx (SImode);
5637 emit_insn (gen_spu_convert (c, ops[1]));
5638 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5639 for (i = 0; i < 8; i++)
5640 arr[last - i] = 7 - i;
5646 emit_move_insn (pat, array_to_constant (TImode, arr));
5647 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5650 /* expand vector initialization. If there are any constant parts,
5651 load constant parts first. Then load any non-constant parts. */
5653 spu_expand_vector_init (rtx target, rtx vals)
5655 enum machine_mode mode = GET_MODE (target);
5656 int n_elts = GET_MODE_NUNITS (mode);
5658 bool all_same = true;
5659 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5662 first = XVECEXP (vals, 0, 0);
5663 for (i = 0; i < n_elts; ++i)
5665 x = XVECEXP (vals, 0, i);
5666 if (!(CONST_INT_P (x)
5667 || GET_CODE (x) == CONST_DOUBLE
5668 || GET_CODE (x) == CONST_FIXED))
5672 if (first_constant == NULL_RTX)
5675 if (i > 0 && !rtx_equal_p (x, first))
5679 /* if all elements are the same, use splats to repeat elements */
5682 if (!CONSTANT_P (first)
5683 && !register_operand (first, GET_MODE (x)))
5684 first = force_reg (GET_MODE (first), first);
5685 emit_insn (gen_spu_splats (target, first));
5689 /* load constant parts */
5690 if (n_var != n_elts)
5694 emit_move_insn (target,
5695 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5699 rtx constant_parts_rtx = copy_rtx (vals);
5701 gcc_assert (first_constant != NULL_RTX);
5702 /* fill empty slots with the first constant, this increases
5703 our chance of using splats in the recursive call below. */
5704 for (i = 0; i < n_elts; ++i)
5706 x = XVECEXP (constant_parts_rtx, 0, i);
5707 if (!(CONST_INT_P (x)
5708 || GET_CODE (x) == CONST_DOUBLE
5709 || GET_CODE (x) == CONST_FIXED))
5710 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5713 spu_expand_vector_init (target, constant_parts_rtx);
5717 /* load variable parts */
5720 rtx insert_operands[4];
5722 insert_operands[0] = target;
5723 insert_operands[2] = target;
5724 for (i = 0; i < n_elts; ++i)
5726 x = XVECEXP (vals, 0, i);
5727 if (!(CONST_INT_P (x)
5728 || GET_CODE (x) == CONST_DOUBLE
5729 || GET_CODE (x) == CONST_FIXED))
5731 if (!register_operand (x, GET_MODE (x)))
5732 x = force_reg (GET_MODE (x), x);
5733 insert_operands[1] = x;
5734 insert_operands[3] = GEN_INT (i);
5735 spu_builtin_insert (insert_operands);
5741 /* Return insn index for the vector compare instruction for given CODE,
5742 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5745 get_vec_cmp_insn (enum rtx_code code,
5746 enum machine_mode dest_mode,
5747 enum machine_mode op_mode)
5753 if (dest_mode == V16QImode && op_mode == V16QImode)
5754 return CODE_FOR_ceq_v16qi;
5755 if (dest_mode == V8HImode && op_mode == V8HImode)
5756 return CODE_FOR_ceq_v8hi;
5757 if (dest_mode == V4SImode && op_mode == V4SImode)
5758 return CODE_FOR_ceq_v4si;
5759 if (dest_mode == V4SImode && op_mode == V4SFmode)
5760 return CODE_FOR_ceq_v4sf;
5761 if (dest_mode == V2DImode && op_mode == V2DFmode)
5762 return CODE_FOR_ceq_v2df;
5765 if (dest_mode == V16QImode && op_mode == V16QImode)
5766 return CODE_FOR_cgt_v16qi;
5767 if (dest_mode == V8HImode && op_mode == V8HImode)
5768 return CODE_FOR_cgt_v8hi;
5769 if (dest_mode == V4SImode && op_mode == V4SImode)
5770 return CODE_FOR_cgt_v4si;
5771 if (dest_mode == V4SImode && op_mode == V4SFmode)
5772 return CODE_FOR_cgt_v4sf;
5773 if (dest_mode == V2DImode && op_mode == V2DFmode)
5774 return CODE_FOR_cgt_v2df;
5777 if (dest_mode == V16QImode && op_mode == V16QImode)
5778 return CODE_FOR_clgt_v16qi;
5779 if (dest_mode == V8HImode && op_mode == V8HImode)
5780 return CODE_FOR_clgt_v8hi;
5781 if (dest_mode == V4SImode && op_mode == V4SImode)
5782 return CODE_FOR_clgt_v4si;
5790 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5791 DMODE is expected destination mode. This is a recursive function. */
5794 spu_emit_vector_compare (enum rtx_code rcode,
5796 enum machine_mode dmode)
5800 enum machine_mode dest_mode;
5801 enum machine_mode op_mode = GET_MODE (op1);
5803 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5805 /* Floating point vector compare instructions uses destination V4SImode.
5806 Double floating point vector compare instructions uses destination V2DImode.
5807 Move destination to appropriate mode later. */
5808 if (dmode == V4SFmode)
5809 dest_mode = V4SImode;
5810 else if (dmode == V2DFmode)
5811 dest_mode = V2DImode;
5815 mask = gen_reg_rtx (dest_mode);
5816 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5818 if (vec_cmp_insn == -1)
5820 bool swap_operands = false;
5821 bool try_again = false;
5826 swap_operands = true;
5831 swap_operands = true;
5835 /* Treat A != B as ~(A==B). */
5837 enum insn_code nor_code;
5838 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5839 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5840 gcc_assert (nor_code != CODE_FOR_nothing);
5841 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5842 if (dmode != dest_mode)
5844 rtx temp = gen_reg_rtx (dest_mode);
5845 convert_move (temp, mask, 0);
5855 /* Try GT/GTU/LT/LTU OR EQ */
5858 enum insn_code ior_code;
5859 enum rtx_code new_code;
5863 case GE: new_code = GT; break;
5864 case GEU: new_code = GTU; break;
5865 case LE: new_code = LT; break;
5866 case LEU: new_code = LTU; break;
5871 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5872 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5874 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5875 gcc_assert (ior_code != CODE_FOR_nothing);
5876 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5877 if (dmode != dest_mode)
5879 rtx temp = gen_reg_rtx (dest_mode);
5880 convert_move (temp, mask, 0);
5890 /* You only get two chances. */
5892 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5894 gcc_assert (vec_cmp_insn != -1);
5905 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5906 if (dmode != dest_mode)
5908 rtx temp = gen_reg_rtx (dest_mode);
5909 convert_move (temp, mask, 0);
5916 /* Emit vector conditional expression.
5917 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5918 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5921 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5922 rtx cond, rtx cc_op0, rtx cc_op1)
5924 enum machine_mode dest_mode = GET_MODE (dest);
5925 enum rtx_code rcode = GET_CODE (cond);
5928 /* Get the vector mask for the given relational operations. */
5929 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5931 emit_insn(gen_selb (dest, op2, op1, mask));
5937 spu_force_reg (enum machine_mode mode, rtx op)
5940 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5942 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5943 || GET_MODE (op) == BLKmode)
5944 return force_reg (mode, convert_to_mode (mode, op, 0));
5948 r = force_reg (GET_MODE (op), op);
5949 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5951 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5956 x = gen_reg_rtx (mode);
5957 emit_insn (gen_spu_convert (x, r));
5962 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5964 HOST_WIDE_INT v = 0;
5966 /* Check the range of immediate operands. */
5967 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5969 int range = p - SPU_BTI_7;
5971 if (!CONSTANT_P (op))
5972 error ("%s expects an integer literal in the range [%d, %d].",
5974 spu_builtin_range[range].low, spu_builtin_range[range].high);
5976 if (GET_CODE (op) == CONST
5977 && (GET_CODE (XEXP (op, 0)) == PLUS
5978 || GET_CODE (XEXP (op, 0)) == MINUS))
5980 v = INTVAL (XEXP (XEXP (op, 0), 1));
5981 op = XEXP (XEXP (op, 0), 0);
5983 else if (GET_CODE (op) == CONST_INT)
5985 else if (GET_CODE (op) == CONST_VECTOR
5986 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5987 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5989 /* The default for v is 0 which is valid in every range. */
5990 if (v < spu_builtin_range[range].low
5991 || v > spu_builtin_range[range].high)
5992 error ("%s expects an integer literal in the range [%d, %d]. ("
5993 HOST_WIDE_INT_PRINT_DEC ")",
5995 spu_builtin_range[range].low, spu_builtin_range[range].high,
6004 /* This is only used in lqa, and stqa. Even though the insns
6005 encode 16 bits of the address (all but the 2 least
6006 significant), only 14 bits are used because it is masked to
6007 be 16 byte aligned. */
6011 /* This is used for lqr and stqr. */
6018 if (GET_CODE (op) == LABEL_REF
6019 || (GET_CODE (op) == SYMBOL_REF
6020 && SYMBOL_REF_FUNCTION_P (op))
6021 || (v & ((1 << lsbits) - 1)) != 0)
6022 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6029 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6030 rtx target, rtx ops[])
6032 enum insn_code icode = d->icode;
6035 /* Expand the arguments into rtl. */
6037 if (d->parm[0] != SPU_BTI_VOID)
6040 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6042 tree arg = CALL_EXPR_ARG (exp, a);
6045 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6048 /* The insn pattern may have additional operands (SCRATCH).
6049 Return the number of actual non-SCRATCH operands. */
6050 gcc_assert (i <= insn_data[icode].n_operands);
6055 spu_expand_builtin_1 (struct spu_builtin_description *d,
6056 tree exp, rtx target)
6060 enum insn_code icode = d->icode;
6061 enum machine_mode mode, tmode;
6066 /* Set up ops[] with values from arglist. */
6067 n_operands = expand_builtin_args (d, exp, target, ops);
6069 /* Handle the target operand which must be operand 0. */
6071 if (d->parm[0] != SPU_BTI_VOID)
6074 /* We prefer the mode specified for the match_operand otherwise
6075 use the mode from the builtin function prototype. */
6076 tmode = insn_data[d->icode].operand[0].mode;
6077 if (tmode == VOIDmode)
6078 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6080 /* Try to use target because not using it can lead to extra copies
6081 and when we are using all of the registers extra copies leads
6083 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6086 target = ops[0] = gen_reg_rtx (tmode);
6088 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6094 if (d->fcode == SPU_MASK_FOR_LOAD)
6096 enum machine_mode mode = insn_data[icode].operand[1].mode;
6101 arg = CALL_EXPR_ARG (exp, 0);
6102 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6103 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6104 addr = memory_address (mode, op);
6107 op = gen_reg_rtx (GET_MODE (addr));
6108 emit_insn (gen_rtx_SET (VOIDmode, op,
6109 gen_rtx_NEG (GET_MODE (addr), addr)));
6110 op = gen_rtx_MEM (mode, op);
6112 pat = GEN_FCN (icode) (target, op);
6119 /* Ignore align_hint, but still expand it's args in case they have
6121 if (icode == CODE_FOR_spu_align_hint)
6124 /* Handle the rest of the operands. */
6125 for (p = 1; i < n_operands; i++, p++)
6127 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6128 mode = insn_data[d->icode].operand[i].mode;
6130 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6132 /* mode can be VOIDmode here for labels */
6134 /* For specific intrinsics with an immediate operand, e.g.,
6135 si_ai(), we sometimes need to convert the scalar argument to a
6136 vector argument by splatting the scalar. */
6137 if (VECTOR_MODE_P (mode)
6138 && (GET_CODE (ops[i]) == CONST_INT
6139 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6140 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6142 if (GET_CODE (ops[i]) == CONST_INT)
6143 ops[i] = spu_const (mode, INTVAL (ops[i]));
6146 rtx reg = gen_reg_rtx (mode);
6147 enum machine_mode imode = GET_MODE_INNER (mode);
6148 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6149 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6150 if (imode != GET_MODE (ops[i]))
6151 ops[i] = convert_to_mode (imode, ops[i],
6152 TYPE_UNSIGNED (spu_builtin_types
6154 emit_insn (gen_spu_splats (reg, ops[i]));
6159 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6161 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6162 ops[i] = spu_force_reg (mode, ops[i]);
6168 pat = GEN_FCN (icode) (0);
6171 pat = GEN_FCN (icode) (ops[0]);
6174 pat = GEN_FCN (icode) (ops[0], ops[1]);
6177 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6180 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6183 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6186 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6195 if (d->type == B_CALL || d->type == B_BISLED)
6196 emit_call_insn (pat);
6197 else if (d->type == B_JUMP)
6199 emit_jump_insn (pat);
6205 return_type = spu_builtin_types[d->parm[0]];
6206 if (d->parm[0] != SPU_BTI_VOID
6207 && GET_MODE (target) != TYPE_MODE (return_type))
6209 /* target is the return value. It should always be the mode of
6210 the builtin function prototype. */
6211 target = spu_force_reg (TYPE_MODE (return_type), target);
6218 spu_expand_builtin (tree exp,
6220 rtx subtarget ATTRIBUTE_UNUSED,
6221 enum machine_mode mode ATTRIBUTE_UNUSED,
6222 int ignore ATTRIBUTE_UNUSED)
6224 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6225 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6226 struct spu_builtin_description *d;
6228 if (fcode < NUM_SPU_BUILTINS)
6230 d = &spu_builtins[fcode];
6232 return spu_expand_builtin_1 (d, exp, target);
6237 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6239 spu_builtin_mul_widen_even (tree type)
6241 switch (TYPE_MODE (type))
6244 if (TYPE_UNSIGNED (type))
6245 return spu_builtins[SPU_MULE_0].fndecl;
6247 return spu_builtins[SPU_MULE_1].fndecl;
6254 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6256 spu_builtin_mul_widen_odd (tree type)
6258 switch (TYPE_MODE (type))
6261 if (TYPE_UNSIGNED (type))
6262 return spu_builtins[SPU_MULO_1].fndecl;
6264 return spu_builtins[SPU_MULO_0].fndecl;
6271 /* Implement targetm.vectorize.builtin_mask_for_load. */
6273 spu_builtin_mask_for_load (void)
6275 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6280 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6282 spu_builtin_vectorization_cost (bool runtime_test)
6284 /* If the branch of the runtime test is taken - i.e. - the vectorized
6285 version is skipped - this incurs a misprediction cost (because the
6286 vectorized version is expected to be the fall-through). So we subtract
6287 the latency of a mispredicted branch from the costs that are incurred
6288 when the vectorized version is executed. */
6295 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6296 after applying N number of iterations. This routine does not determine
6297 how may iterations are required to reach desired alignment. */
6300 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6305 /* All other types are naturally aligned. */
6309 /* Implement targetm.vectorize.builtin_vec_perm. */
6311 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6313 struct spu_builtin_description *d;
6315 *mask_element_type = unsigned_char_type_node;
6317 switch (TYPE_MODE (type))
6320 if (TYPE_UNSIGNED (type))
6321 d = &spu_builtins[SPU_SHUFFLE_0];
6323 d = &spu_builtins[SPU_SHUFFLE_1];
6327 if (TYPE_UNSIGNED (type))
6328 d = &spu_builtins[SPU_SHUFFLE_2];
6330 d = &spu_builtins[SPU_SHUFFLE_3];
6334 if (TYPE_UNSIGNED (type))
6335 d = &spu_builtins[SPU_SHUFFLE_4];
6337 d = &spu_builtins[SPU_SHUFFLE_5];
6341 if (TYPE_UNSIGNED (type))
6342 d = &spu_builtins[SPU_SHUFFLE_6];
6344 d = &spu_builtins[SPU_SHUFFLE_7];
6348 d = &spu_builtins[SPU_SHUFFLE_8];
6352 d = &spu_builtins[SPU_SHUFFLE_9];
6363 /* Count the total number of instructions in each pipe and return the
6364 maximum, which is used as the Minimum Iteration Interval (MII)
6365 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6366 -2 are instructions that can go in pipe0 or pipe1. */
6368 spu_sms_res_mii (struct ddg *g)
6371 unsigned t[4] = {0, 0, 0, 0};
6373 for (i = 0; i < g->num_nodes; i++)
6375 rtx insn = g->nodes[i].insn;
6376 int p = get_pipe (insn) + 2;
6382 if (dump_file && INSN_P (insn))
6383 fprintf (dump_file, "i%d %s %d %d\n",
6385 insn_data[INSN_CODE(insn)].name,
6389 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6391 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6396 spu_init_expanders (void)
6398 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6399 * frame_pointer_needed is true. We don't know that until we're
6400 * expanding the prologue. */
6402 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6405 static enum machine_mode
6406 spu_libgcc_cmp_return_mode (void)
6409 /* For SPU word mode is TI mode so it is better to use SImode
6410 for compare returns. */
6414 static enum machine_mode
6415 spu_libgcc_shift_count_mode (void)
6417 /* For SPU word mode is TI mode so it is better to use SImode
6418 for shift counts. */
6422 /* An early place to adjust some flags after GCC has finished processing
6425 asm_file_start (void)
6427 /* Variable tracking should be run after all optimizations which
6428 change order of insns. It also needs a valid CFG. */
6429 spu_flag_var_tracking = flag_var_tracking;
6430 flag_var_tracking = 0;
6432 default_file_start ();
6435 /* Implement targetm.section_type_flags. */
6437 spu_section_type_flags (tree decl, const char *name, int reloc)
6439 /* .toe needs to have type @nobits. */
6440 if (strcmp (name, ".toe") == 0)
6442 return default_section_type_flags (decl, name, reloc);
6445 /* Generate a constant or register which contains 2^SCALE. We assume
6446 the result is valid for MODE. Currently, MODE must be V4SFmode and
6447 SCALE must be SImode. */
6449 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6451 gcc_assert (mode == V4SFmode);
6452 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6453 if (GET_CODE (scale) != CONST_INT)
6455 /* unsigned int exp = (127 + scale) << 23;
6456 __vector float m = (__vector float) spu_splats (exp); */
6457 rtx reg = force_reg (SImode, scale);
6458 rtx exp = gen_reg_rtx (SImode);
6459 rtx mul = gen_reg_rtx (mode);
6460 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6461 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6462 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6467 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6468 unsigned char arr[16];
6469 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6470 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6471 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6472 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6473 return array_to_constant (mode, arr);