1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
53 #include "tm-constrs.h"
59 /* Builtin types, data and prototypes. */
61 enum spu_builtin_type_index
63 SPU_BTI_END_OF_PARAMS,
65 /* We create new type nodes for these. */
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
80 /* These all correspond to intSI_type_node */
94 /* These correspond to the standard types */
114 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
125 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
127 struct spu_builtin_range
132 static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
148 /* Target specific attribute specifications. */
149 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
151 /* Prototypes and external defs. */
152 static void spu_init_builtins (void);
153 static tree spu_builtin_decl (unsigned, bool);
154 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
155 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
156 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
157 static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
159 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
160 static rtx get_pic_reg (void);
161 static int need_to_save_reg (int regno, int saving);
162 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
163 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
164 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
166 static void emit_nop_for_insn (rtx insn);
167 static bool insn_clobbers_hbr (rtx insn);
168 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
169 int distance, sbitmap blocks);
170 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
171 enum machine_mode dmode);
172 static rtx get_branch_target (rtx branch);
173 static void spu_machine_dependent_reorg (void);
174 static int spu_sched_issue_rate (void);
175 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
177 static int get_pipe (rtx insn);
178 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
179 static void spu_sched_init_global (FILE *, int, int);
180 static void spu_sched_init (FILE *, int, int);
181 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
182 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
184 unsigned char *no_add_attrs);
185 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
187 unsigned char *no_add_attrs);
188 static int spu_naked_function_p (tree func);
189 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
190 const_tree type, unsigned char named);
191 static tree spu_build_builtin_va_list (void);
192 static void spu_va_start (tree, rtx);
193 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
194 gimple_seq * pre_p, gimple_seq * post_p);
195 static int store_with_one_insn_p (rtx mem);
196 static int mem_is_padded_component_ref (rtx x);
197 static int reg_aligned_for_addr (rtx x);
198 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
199 static void spu_asm_globalize_label (FILE * file, const char *name);
200 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
201 int *total, bool speed);
202 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
203 static void spu_init_libfuncs (void);
204 static bool spu_return_in_memory (const_tree type, const_tree fntype);
205 static void fix_range (const char *);
206 static void spu_encode_section_info (tree, rtx, int);
207 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
208 static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
210 static tree spu_builtin_mul_widen_even (tree);
211 static tree spu_builtin_mul_widen_odd (tree);
212 static tree spu_builtin_mask_for_load (void);
213 static int spu_builtin_vectorization_cost (bool);
214 static bool spu_vector_alignment_reachable (const_tree, bool);
215 static tree spu_builtin_vec_perm (tree, tree *);
216 static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
217 static enum machine_mode spu_addr_space_address_mode (addr_space_t);
218 static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
219 static rtx spu_addr_space_convert (rtx, tree, tree);
220 static int spu_sms_res_mii (struct ddg *g);
221 static void asm_file_start (void);
222 static unsigned int spu_section_type_flags (tree, const char *, int);
223 static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
224 static void spu_unique_section (tree, int);
225 static rtx spu_expand_load (rtx, rtx, rtx, int);
226 static void spu_trampoline_init (rtx, tree, rtx);
228 extern const char *reg_names[];
230 /* Which instruction set architecture to use. */
232 /* Which cpu are we tuning for. */
235 /* The hardware requires 8 insns between a hint and the branch it
236 effects. This variable describes how many rtl instructions the
237 compiler needs to see before inserting a hint, and then the compiler
238 will insert enough nops to make it at least 8 insns. The default is
239 for the compiler to allow up to 2 nops be emitted. The nops are
240 inserted in pairs, so we round down. */
241 int spu_hint_dist = (8*4) - (2*4);
243 /* Determines whether we run variable tracking in machine dependent
245 static int spu_flag_var_tracking;
260 IC_POOL, /* constant pool */
261 IC_IL1, /* one il* instruction */
262 IC_IL2, /* both ilhu and iohl instructions */
263 IC_IL1s, /* one il* instruction */
264 IC_IL2s, /* both ilhu and iohl instructions */
265 IC_FSMBI, /* the fsmbi instruction */
266 IC_CPAT, /* one of the c*d instructions */
267 IC_FSMBI2 /* fsmbi plus 1 other instruction */
270 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
271 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
272 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
273 static enum immediate_class classify_immediate (rtx op,
274 enum machine_mode mode);
276 static enum machine_mode spu_unwind_word_mode (void);
278 static enum machine_mode
279 spu_libgcc_cmp_return_mode (void);
281 static enum machine_mode
282 spu_libgcc_shift_count_mode (void);
284 /* Pointer mode for __ea references. */
285 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
288 /* Table of machine attributes. */
289 static const struct attribute_spec spu_attribute_table[] =
291 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
292 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
293 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
294 { NULL, 0, 0, false, false, false, NULL }
297 /* TARGET overrides. */
299 #undef TARGET_ADDR_SPACE_POINTER_MODE
300 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
302 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
303 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
305 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
306 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
307 spu_addr_space_legitimate_address_p
309 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
310 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
312 #undef TARGET_ADDR_SPACE_SUBSET_P
313 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
315 #undef TARGET_ADDR_SPACE_CONVERT
316 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
318 #undef TARGET_INIT_BUILTINS
319 #define TARGET_INIT_BUILTINS spu_init_builtins
320 #undef TARGET_BUILTIN_DECL
321 #define TARGET_BUILTIN_DECL spu_builtin_decl
323 #undef TARGET_EXPAND_BUILTIN
324 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
326 #undef TARGET_UNWIND_WORD_MODE
327 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
329 #undef TARGET_LEGITIMIZE_ADDRESS
330 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
332 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
333 and .quad for the debugger. When it is known that the assembler is fixed,
334 these can be removed. */
335 #undef TARGET_ASM_UNALIGNED_SI_OP
336 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
338 #undef TARGET_ASM_ALIGNED_DI_OP
339 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
341 /* The .8byte directive doesn't seem to work well for a 32 bit
343 #undef TARGET_ASM_UNALIGNED_DI_OP
344 #define TARGET_ASM_UNALIGNED_DI_OP NULL
346 #undef TARGET_RTX_COSTS
347 #define TARGET_RTX_COSTS spu_rtx_costs
349 #undef TARGET_ADDRESS_COST
350 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
352 #undef TARGET_SCHED_ISSUE_RATE
353 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
355 #undef TARGET_SCHED_INIT_GLOBAL
356 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
358 #undef TARGET_SCHED_INIT
359 #define TARGET_SCHED_INIT spu_sched_init
361 #undef TARGET_SCHED_VARIABLE_ISSUE
362 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
364 #undef TARGET_SCHED_REORDER
365 #define TARGET_SCHED_REORDER spu_sched_reorder
367 #undef TARGET_SCHED_REORDER2
368 #define TARGET_SCHED_REORDER2 spu_sched_reorder
370 #undef TARGET_SCHED_ADJUST_COST
371 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
373 #undef TARGET_ATTRIBUTE_TABLE
374 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
376 #undef TARGET_ASM_INTEGER
377 #define TARGET_ASM_INTEGER spu_assemble_integer
379 #undef TARGET_SCALAR_MODE_SUPPORTED_P
380 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
382 #undef TARGET_VECTOR_MODE_SUPPORTED_P
383 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
385 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
386 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
388 #undef TARGET_ASM_GLOBALIZE_LABEL
389 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
391 #undef TARGET_PASS_BY_REFERENCE
392 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
394 #undef TARGET_MUST_PASS_IN_STACK
395 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
397 #undef TARGET_BUILD_BUILTIN_VA_LIST
398 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
400 #undef TARGET_EXPAND_BUILTIN_VA_START
401 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
403 #undef TARGET_SETUP_INCOMING_VARARGS
404 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
406 #undef TARGET_MACHINE_DEPENDENT_REORG
407 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
409 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
410 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
412 #undef TARGET_DEFAULT_TARGET_FLAGS
413 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
415 #undef TARGET_INIT_LIBFUNCS
416 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
418 #undef TARGET_RETURN_IN_MEMORY
419 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
421 #undef TARGET_ENCODE_SECTION_INFO
422 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
424 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
425 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
427 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
428 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
430 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
431 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
433 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
434 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
436 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
437 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
439 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
440 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
442 #undef TARGET_LIBGCC_CMP_RETURN_MODE
443 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
445 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
446 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
448 #undef TARGET_SCHED_SMS_RES_MII
449 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
451 #undef TARGET_ASM_FILE_START
452 #define TARGET_ASM_FILE_START asm_file_start
454 #undef TARGET_SECTION_TYPE_FLAGS
455 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
457 #undef TARGET_ASM_SELECT_SECTION
458 #define TARGET_ASM_SELECT_SECTION spu_select_section
460 #undef TARGET_ASM_UNIQUE_SECTION
461 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
463 #undef TARGET_LEGITIMATE_ADDRESS_P
464 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
466 #undef TARGET_TRAMPOLINE_INIT
467 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
469 struct gcc_target targetm = TARGET_INITIALIZER;
472 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
474 /* Override some of the default param values. With so many registers
475 larger values are better for these params. */
476 MAX_PENDING_LIST_LENGTH = 128;
478 /* With so many registers this is better on by default. */
479 flag_rename_registers = 1;
482 /* Sometimes certain combinations of command options do not make sense
483 on a particular target machine. You can define a macro
484 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
485 executed once just after all the command options have been parsed. */
487 spu_override_options (void)
489 /* Small loops will be unpeeled at -O3. For SPU it is more important
490 to keep code small by default. */
491 if (!flag_unroll_loops && !flag_peel_loops
492 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
493 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
495 flag_omit_frame_pointer = 1;
497 /* Functions must be 8 byte aligned so we correctly handle dual issue */
498 if (align_functions < 8)
501 spu_hint_dist = 8*4 - spu_max_nops*4;
502 if (spu_hint_dist < 0)
505 if (spu_fixed_range_string)
506 fix_range (spu_fixed_range_string);
508 /* Determine processor architectural level. */
511 if (strcmp (&spu_arch_string[0], "cell") == 0)
512 spu_arch = PROCESSOR_CELL;
513 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
514 spu_arch = PROCESSOR_CELLEDP;
516 error ("Unknown architecture '%s'", &spu_arch_string[0]);
519 /* Determine processor to tune for. */
522 if (strcmp (&spu_tune_string[0], "cell") == 0)
523 spu_tune = PROCESSOR_CELL;
524 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
525 spu_tune = PROCESSOR_CELLEDP;
527 error ("Unknown architecture '%s'", &spu_tune_string[0]);
530 /* Change defaults according to the processor architecture. */
531 if (spu_arch == PROCESSOR_CELLEDP)
533 /* If no command line option has been otherwise specified, change
534 the default to -mno-safe-hints on celledp -- only the original
535 Cell/B.E. processors require this workaround. */
536 if (!(target_flags_explicit & MASK_SAFE_HINTS))
537 target_flags &= ~MASK_SAFE_HINTS;
540 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
543 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
544 struct attribute_spec.handler. */
546 /* True if MODE is valid for the target. By "valid", we mean able to
547 be manipulated in non-trivial ways. In particular, this means all
548 the arithmetic is supported. */
550 spu_scalar_mode_supported_p (enum machine_mode mode)
568 /* Similarly for vector modes. "Supported" here is less strict. At
569 least some operations are supported; need to check optabs or builtins
570 for further details. */
572 spu_vector_mode_supported_p (enum machine_mode mode)
589 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
590 least significant bytes of the outer mode. This function returns
591 TRUE for the SUBREG's where this is correct. */
593 valid_subreg (rtx op)
595 enum machine_mode om = GET_MODE (op);
596 enum machine_mode im = GET_MODE (SUBREG_REG (op));
597 return om != VOIDmode && im != VOIDmode
598 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
599 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
600 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
603 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
604 and adjust the start offset. */
606 adjust_operand (rtx op, HOST_WIDE_INT * start)
608 enum machine_mode mode;
610 /* Strip any paradoxical SUBREG. */
611 if (GET_CODE (op) == SUBREG
612 && (GET_MODE_BITSIZE (GET_MODE (op))
613 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
617 GET_MODE_BITSIZE (GET_MODE (op)) -
618 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
619 op = SUBREG_REG (op);
621 /* If it is smaller than SI, assure a SUBREG */
622 op_size = GET_MODE_BITSIZE (GET_MODE (op));
626 *start += 32 - op_size;
629 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
630 mode = mode_for_size (op_size, MODE_INT, 0);
631 if (mode != GET_MODE (op))
632 op = gen_rtx_SUBREG (mode, op, 0);
637 spu_expand_extv (rtx ops[], int unsignedp)
639 rtx dst = ops[0], src = ops[1];
640 HOST_WIDE_INT width = INTVAL (ops[2]);
641 HOST_WIDE_INT start = INTVAL (ops[3]);
642 HOST_WIDE_INT align_mask;
643 rtx s0, s1, mask, r0;
645 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
649 /* First, determine if we need 1 TImode load or 2. We need only 1
650 if the bits being extracted do not cross the alignment boundary
651 as determined by the MEM and its address. */
653 align_mask = -MEM_ALIGN (src);
654 if ((start & align_mask) == ((start + width - 1) & align_mask))
656 /* Alignment is sufficient for 1 load. */
657 s0 = gen_reg_rtx (TImode);
658 r0 = spu_expand_load (s0, 0, src, start / 8);
661 emit_insn (gen_rotqby_ti (s0, s0, r0));
666 s0 = gen_reg_rtx (TImode);
667 s1 = gen_reg_rtx (TImode);
668 r0 = spu_expand_load (s0, s1, src, start / 8);
671 gcc_assert (start + width <= 128);
674 rtx r1 = gen_reg_rtx (SImode);
675 mask = gen_reg_rtx (TImode);
676 emit_move_insn (mask, GEN_INT (-1));
677 emit_insn (gen_rotqby_ti (s0, s0, r0));
678 emit_insn (gen_rotqby_ti (s1, s1, r0));
679 if (GET_CODE (r0) == CONST_INT)
680 r1 = GEN_INT (INTVAL (r0) & 15);
682 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
683 emit_insn (gen_shlqby_ti (mask, mask, r1));
684 emit_insn (gen_selb (s0, s1, s0, mask));
689 else if (GET_CODE (src) == SUBREG)
691 rtx r = SUBREG_REG (src);
692 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
693 s0 = gen_reg_rtx (TImode);
694 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
695 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
697 emit_move_insn (s0, src);
701 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
702 s0 = gen_reg_rtx (TImode);
703 emit_move_insn (s0, src);
706 /* Now s0 is TImode and contains the bits to extract at start. */
709 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
713 tree c = build_int_cst (NULL_TREE, 128 - width);
714 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
717 emit_move_insn (dst, s0);
721 spu_expand_insv (rtx ops[])
723 HOST_WIDE_INT width = INTVAL (ops[1]);
724 HOST_WIDE_INT start = INTVAL (ops[2]);
725 HOST_WIDE_INT maskbits;
726 enum machine_mode dst_mode, src_mode;
727 rtx dst = ops[0], src = ops[3];
728 int dst_size, src_size;
734 if (GET_CODE (ops[0]) == MEM)
735 dst = gen_reg_rtx (TImode);
737 dst = adjust_operand (dst, &start);
738 dst_mode = GET_MODE (dst);
739 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
741 if (CONSTANT_P (src))
743 enum machine_mode m =
744 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
745 src = force_reg (m, convert_to_mode (m, src, 0));
747 src = adjust_operand (src, 0);
748 src_mode = GET_MODE (src);
749 src_size = GET_MODE_BITSIZE (GET_MODE (src));
751 mask = gen_reg_rtx (dst_mode);
752 shift_reg = gen_reg_rtx (dst_mode);
753 shift = dst_size - start - width;
755 /* It's not safe to use subreg here because the compiler assumes
756 that the SUBREG_REG is right justified in the SUBREG. */
757 convert_move (shift_reg, src, 1);
764 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
767 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
770 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
782 maskbits = (-1ll << (32 - width - start));
784 maskbits += (1ll << (32 - start));
785 emit_move_insn (mask, GEN_INT (maskbits));
788 maskbits = (-1ll << (64 - width - start));
790 maskbits += (1ll << (64 - start));
791 emit_move_insn (mask, GEN_INT (maskbits));
795 unsigned char arr[16];
797 memset (arr, 0, sizeof (arr));
798 arr[i] = 0xff >> (start & 7);
799 for (i++; i <= (start + width - 1) / 8; i++)
801 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
802 emit_move_insn (mask, array_to_constant (TImode, arr));
808 if (GET_CODE (ops[0]) == MEM)
810 rtx low = gen_reg_rtx (SImode);
811 rtx rotl = gen_reg_rtx (SImode);
812 rtx mask0 = gen_reg_rtx (TImode);
818 addr = force_reg (Pmode, XEXP (ops[0], 0));
819 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
820 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
821 emit_insn (gen_negsi2 (rotl, low));
822 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
823 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
824 mem = change_address (ops[0], TImode, addr0);
825 set_mem_alias_set (mem, 0);
826 emit_move_insn (dst, mem);
827 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
828 if (start + width > MEM_ALIGN (ops[0]))
830 rtx shl = gen_reg_rtx (SImode);
831 rtx mask1 = gen_reg_rtx (TImode);
832 rtx dst1 = gen_reg_rtx (TImode);
834 addr1 = plus_constant (addr, 16);
835 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
836 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
837 emit_insn (gen_shlqby_ti (mask1, mask, shl));
838 mem1 = change_address (ops[0], TImode, addr1);
839 set_mem_alias_set (mem1, 0);
840 emit_move_insn (dst1, mem1);
841 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
842 emit_move_insn (mem1, dst1);
844 emit_move_insn (mem, dst);
847 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
852 spu_expand_block_move (rtx ops[])
854 HOST_WIDE_INT bytes, align, offset;
855 rtx src, dst, sreg, dreg, target;
857 if (GET_CODE (ops[2]) != CONST_INT
858 || GET_CODE (ops[3]) != CONST_INT
859 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
862 bytes = INTVAL (ops[2]);
863 align = INTVAL (ops[3]);
873 for (offset = 0; offset + 16 <= bytes; offset += 16)
875 dst = adjust_address (ops[0], V16QImode, offset);
876 src = adjust_address (ops[1], V16QImode, offset);
877 emit_move_insn (dst, src);
882 unsigned char arr[16] = { 0 };
883 for (i = 0; i < bytes - offset; i++)
885 dst = adjust_address (ops[0], V16QImode, offset);
886 src = adjust_address (ops[1], V16QImode, offset);
887 mask = gen_reg_rtx (V16QImode);
888 sreg = gen_reg_rtx (V16QImode);
889 dreg = gen_reg_rtx (V16QImode);
890 target = gen_reg_rtx (V16QImode);
891 emit_move_insn (mask, array_to_constant (V16QImode, arr));
892 emit_move_insn (dreg, dst);
893 emit_move_insn (sreg, src);
894 emit_insn (gen_selb (target, dreg, sreg, mask));
895 emit_move_insn (dst, target);
903 { SPU_EQ, SPU_GT, SPU_GTU };
905 int spu_comp_icode[12][3] = {
906 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
907 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
908 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
909 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
910 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
911 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
912 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
913 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
914 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
915 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
916 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
917 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
920 /* Generate a compare for CODE. Return a brand-new rtx that represents
921 the result of the compare. GCC can figure this out too if we don't
922 provide all variations of compares, but GCC always wants to use
923 WORD_MODE, we can generate better code in most cases if we do it
926 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
928 int reverse_compare = 0;
929 int reverse_test = 0;
930 rtx compare_result, eq_result;
931 rtx comp_rtx, eq_rtx;
932 enum machine_mode comp_mode;
933 enum machine_mode op_mode;
934 enum spu_comp_code scode, eq_code;
935 enum insn_code ior_code;
936 enum rtx_code code = GET_CODE (cmp);
937 rtx op0 = XEXP (cmp, 0);
938 rtx op1 = XEXP (cmp, 1);
942 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
943 and so on, to keep the constant in operand 1. */
944 if (GET_CODE (op1) == CONST_INT)
946 HOST_WIDE_INT val = INTVAL (op1) - 1;
947 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
972 op_mode = GET_MODE (op0);
978 if (HONOR_NANS (op_mode))
993 if (HONOR_NANS (op_mode))
1002 reverse_compare = 0;
1007 reverse_compare = 1;
1012 reverse_compare = 1;
1017 reverse_compare = 0;
1022 reverse_compare = 1;
1027 reverse_compare = 0;
1073 comp_mode = op_mode;
1077 comp_mode = op_mode;
1081 comp_mode = op_mode;
1085 comp_mode = V4SImode;
1089 comp_mode = V2DImode;
1096 if (GET_MODE (op1) == DFmode
1097 && (scode != SPU_GT && scode != SPU_EQ))
1100 if (is_set == 0 && op1 == const0_rtx
1101 && (GET_MODE (op0) == SImode
1102 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1104 /* Don't need to set a register with the result when we are
1105 comparing against zero and branching. */
1106 reverse_test = !reverse_test;
1107 compare_result = op0;
1111 compare_result = gen_reg_rtx (comp_mode);
1113 if (reverse_compare)
1120 if (spu_comp_icode[index][scode] == 0)
1123 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1125 op0 = force_reg (op_mode, op0);
1126 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1128 op1 = force_reg (op_mode, op1);
1129 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1133 emit_insn (comp_rtx);
1137 eq_result = gen_reg_rtx (comp_mode);
1138 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1143 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1144 gcc_assert (ior_code != CODE_FOR_nothing);
1145 emit_insn (GEN_FCN (ior_code)
1146 (compare_result, compare_result, eq_result));
1155 /* We don't have branch on QI compare insns, so we convert the
1156 QI compare result to a HI result. */
1157 if (comp_mode == QImode)
1159 rtx old_res = compare_result;
1160 compare_result = gen_reg_rtx (HImode);
1162 emit_insn (gen_extendqihi2 (compare_result, old_res));
1166 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1168 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1170 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1171 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1172 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1175 else if (is_set == 2)
1177 rtx target = operands[0];
1178 int compare_size = GET_MODE_BITSIZE (comp_mode);
1179 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1180 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1182 rtx op_t = operands[2];
1183 rtx op_f = operands[3];
1185 /* The result of the comparison can be SI, HI or QI mode. Create a
1186 mask based on that result. */
1187 if (target_size > compare_size)
1189 select_mask = gen_reg_rtx (mode);
1190 emit_insn (gen_extend_compare (select_mask, compare_result));
1192 else if (target_size < compare_size)
1194 gen_rtx_SUBREG (mode, compare_result,
1195 (compare_size - target_size) / BITS_PER_UNIT);
1196 else if (comp_mode != mode)
1197 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1199 select_mask = compare_result;
1201 if (GET_MODE (target) != GET_MODE (op_t)
1202 || GET_MODE (target) != GET_MODE (op_f))
1206 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1208 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1212 rtx target = operands[0];
1214 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1215 gen_rtx_NOT (comp_mode, compare_result)));
1216 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1217 emit_insn (gen_extendhisi2 (target, compare_result));
1218 else if (GET_MODE (target) == SImode
1219 && GET_MODE (compare_result) == QImode)
1220 emit_insn (gen_extend_compare (target, compare_result));
1222 emit_move_insn (target, compare_result);
1227 const_double_to_hwint (rtx x)
1231 if (GET_MODE (x) == SFmode)
1233 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1234 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1236 else if (GET_MODE (x) == DFmode)
1239 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1240 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1242 val = (val << 32) | (l[1] & 0xffffffff);
1250 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1254 gcc_assert (mode == SFmode || mode == DFmode);
1257 tv[0] = (v << 32) >> 32;
1258 else if (mode == DFmode)
1260 tv[1] = (v << 32) >> 32;
1263 real_from_target (&rv, tv, mode);
1264 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1268 print_operand_address (FILE * file, register rtx addr)
1273 if (GET_CODE (addr) == AND
1274 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1275 && INTVAL (XEXP (addr, 1)) == -16)
1276 addr = XEXP (addr, 0);
1278 switch (GET_CODE (addr))
1281 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1285 reg = XEXP (addr, 0);
1286 offset = XEXP (addr, 1);
1287 if (GET_CODE (offset) == REG)
1289 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1290 reg_names[REGNO (offset)]);
1292 else if (GET_CODE (offset) == CONST_INT)
1294 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1295 INTVAL (offset), reg_names[REGNO (reg)]);
1305 output_addr_const (file, addr);
1315 print_operand (FILE * file, rtx x, int code)
1317 enum machine_mode mode = GET_MODE (x);
1319 unsigned char arr[16];
1320 int xcode = GET_CODE (x);
1322 if (GET_MODE (x) == VOIDmode)
1325 case 'L': /* 128 bits, signed */
1326 case 'm': /* 128 bits, signed */
1327 case 'T': /* 128 bits, signed */
1328 case 't': /* 128 bits, signed */
1331 case 'K': /* 64 bits, signed */
1332 case 'k': /* 64 bits, signed */
1333 case 'D': /* 64 bits, signed */
1334 case 'd': /* 64 bits, signed */
1337 case 'J': /* 32 bits, signed */
1338 case 'j': /* 32 bits, signed */
1339 case 's': /* 32 bits, signed */
1340 case 'S': /* 32 bits, signed */
1347 case 'j': /* 32 bits, signed */
1348 case 'k': /* 64 bits, signed */
1349 case 'm': /* 128 bits, signed */
1350 if (xcode == CONST_INT
1351 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1353 gcc_assert (logical_immediate_p (x, mode));
1354 constant_to_array (mode, x, arr);
1355 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1356 val = trunc_int_for_mode (val, SImode);
1357 switch (which_logical_immediate (val))
1362 fprintf (file, "h");
1365 fprintf (file, "b");
1375 case 'J': /* 32 bits, signed */
1376 case 'K': /* 64 bits, signed */
1377 case 'L': /* 128 bits, signed */
1378 if (xcode == CONST_INT
1379 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1381 gcc_assert (logical_immediate_p (x, mode)
1382 || iohl_immediate_p (x, mode));
1383 constant_to_array (mode, x, arr);
1384 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1385 val = trunc_int_for_mode (val, SImode);
1386 switch (which_logical_immediate (val))
1392 val = trunc_int_for_mode (val, HImode);
1395 val = trunc_int_for_mode (val, QImode);
1400 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1406 case 't': /* 128 bits, signed */
1407 case 'd': /* 64 bits, signed */
1408 case 's': /* 32 bits, signed */
1411 enum immediate_class c = classify_immediate (x, mode);
1415 constant_to_array (mode, x, arr);
1416 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1417 val = trunc_int_for_mode (val, SImode);
1418 switch (which_immediate_load (val))
1423 fprintf (file, "a");
1426 fprintf (file, "h");
1429 fprintf (file, "hu");
1436 constant_to_array (mode, x, arr);
1437 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1439 fprintf (file, "b");
1441 fprintf (file, "h");
1443 fprintf (file, "w");
1445 fprintf (file, "d");
1448 if (xcode == CONST_VECTOR)
1450 x = CONST_VECTOR_ELT (x, 0);
1451 xcode = GET_CODE (x);
1453 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1454 fprintf (file, "a");
1455 else if (xcode == HIGH)
1456 fprintf (file, "hu");
1470 case 'T': /* 128 bits, signed */
1471 case 'D': /* 64 bits, signed */
1472 case 'S': /* 32 bits, signed */
1475 enum immediate_class c = classify_immediate (x, mode);
1479 constant_to_array (mode, x, arr);
1480 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1481 val = trunc_int_for_mode (val, SImode);
1482 switch (which_immediate_load (val))
1489 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1494 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1497 constant_to_array (mode, x, arr);
1499 for (i = 0; i < 16; i++)
1504 print_operand (file, GEN_INT (val), 0);
1507 constant_to_array (mode, x, arr);
1508 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1509 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1514 if (GET_CODE (x) == CONST_VECTOR)
1515 x = CONST_VECTOR_ELT (x, 0);
1516 output_addr_const (file, x);
1518 fprintf (file, "@h");
1532 if (xcode == CONST_INT)
1534 /* Only 4 least significant bits are relevant for generate
1535 control word instructions. */
1536 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1541 case 'M': /* print code for c*d */
1542 if (GET_CODE (x) == CONST_INT)
1546 fprintf (file, "b");
1549 fprintf (file, "h");
1552 fprintf (file, "w");
1555 fprintf (file, "d");
1564 case 'N': /* Negate the operand */
1565 if (xcode == CONST_INT)
1566 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1567 else if (xcode == CONST_VECTOR)
1568 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1569 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1572 case 'I': /* enable/disable interrupts */
1573 if (xcode == CONST_INT)
1574 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1577 case 'b': /* branch modifiers */
1579 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1580 else if (COMPARISON_P (x))
1581 fprintf (file, "%s", xcode == NE ? "n" : "");
1584 case 'i': /* indirect call */
1587 if (GET_CODE (XEXP (x, 0)) == REG)
1588 /* Used in indirect function calls. */
1589 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1591 output_address (XEXP (x, 0));
1595 case 'p': /* load/store */
1599 xcode = GET_CODE (x);
1604 xcode = GET_CODE (x);
1607 fprintf (file, "d");
1608 else if (xcode == CONST_INT)
1609 fprintf (file, "a");
1610 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1611 fprintf (file, "r");
1612 else if (xcode == PLUS || xcode == LO_SUM)
1614 if (GET_CODE (XEXP (x, 1)) == REG)
1615 fprintf (file, "x");
1617 fprintf (file, "d");
1622 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1624 output_addr_const (file, GEN_INT (val));
1628 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1630 output_addr_const (file, GEN_INT (val));
1634 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1636 output_addr_const (file, GEN_INT (val));
1640 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1641 val = (val >> 3) & 0x1f;
1642 output_addr_const (file, GEN_INT (val));
1646 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1649 output_addr_const (file, GEN_INT (val));
1653 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1656 output_addr_const (file, GEN_INT (val));
1660 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1663 output_addr_const (file, GEN_INT (val));
1667 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1668 val = -(val & -8ll);
1669 val = (val >> 3) & 0x1f;
1670 output_addr_const (file, GEN_INT (val));
1675 constant_to_array (mode, x, arr);
1676 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1677 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1682 fprintf (file, "%s", reg_names[REGNO (x)]);
1683 else if (xcode == MEM)
1684 output_address (XEXP (x, 0));
1685 else if (xcode == CONST_VECTOR)
1686 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1688 output_addr_const (file, x);
1695 output_operand_lossage ("invalid %%xn code");
1700 extern char call_used_regs[];
1702 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1703 caller saved register. For leaf functions it is more efficient to
1704 use a volatile register because we won't need to save and restore the
1705 pic register. This routine is only valid after register allocation
1706 is completed, so we can pick an unused register. */
1710 rtx pic_reg = pic_offset_table_rtx;
1711 if (!reload_completed && !reload_in_progress)
1713 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1714 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1718 /* Split constant addresses to handle cases that are too large.
1719 Add in the pic register when in PIC mode.
1720 Split immediates that require more than 1 instruction. */
1722 spu_split_immediate (rtx * ops)
1724 enum machine_mode mode = GET_MODE (ops[0]);
1725 enum immediate_class c = classify_immediate (ops[1], mode);
1731 unsigned char arrhi[16];
1732 unsigned char arrlo[16];
1733 rtx to, temp, hi, lo;
1735 enum machine_mode imode = mode;
1736 /* We need to do reals as ints because the constant used in the
1737 IOR might not be a legitimate real constant. */
1738 imode = int_mode_for_mode (mode);
1739 constant_to_array (mode, ops[1], arrhi);
1741 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1744 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1745 for (i = 0; i < 16; i += 4)
1747 arrlo[i + 2] = arrhi[i + 2];
1748 arrlo[i + 3] = arrhi[i + 3];
1749 arrlo[i + 0] = arrlo[i + 1] = 0;
1750 arrhi[i + 2] = arrhi[i + 3] = 0;
1752 hi = array_to_constant (imode, arrhi);
1753 lo = array_to_constant (imode, arrlo);
1754 emit_move_insn (temp, hi);
1755 emit_insn (gen_rtx_SET
1756 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1761 unsigned char arr_fsmbi[16];
1762 unsigned char arr_andbi[16];
1763 rtx to, reg_fsmbi, reg_and;
1765 enum machine_mode imode = mode;
1766 /* We need to do reals as ints because the constant used in the
1767 * AND might not be a legitimate real constant. */
1768 imode = int_mode_for_mode (mode);
1769 constant_to_array (mode, ops[1], arr_fsmbi);
1771 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1774 for (i = 0; i < 16; i++)
1775 if (arr_fsmbi[i] != 0)
1777 arr_andbi[0] = arr_fsmbi[i];
1778 arr_fsmbi[i] = 0xff;
1780 for (i = 1; i < 16; i++)
1781 arr_andbi[i] = arr_andbi[0];
1782 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1783 reg_and = array_to_constant (imode, arr_andbi);
1784 emit_move_insn (to, reg_fsmbi);
1785 emit_insn (gen_rtx_SET
1786 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1790 if (reload_in_progress || reload_completed)
1792 rtx mem = force_const_mem (mode, ops[1]);
1793 if (TARGET_LARGE_MEM)
1795 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1796 emit_move_insn (addr, XEXP (mem, 0));
1797 mem = replace_equiv_address (mem, addr);
1799 emit_move_insn (ops[0], mem);
1805 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1809 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1810 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1813 emit_insn (gen_pic (ops[0], ops[1]));
1816 rtx pic_reg = get_pic_reg ();
1817 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1818 crtl->uses_pic_offset_table = 1;
1820 return flag_pic || c == IC_IL2s;
1831 /* SAVING is TRUE when we are generating the actual load and store
1832 instructions for REGNO. When determining the size of the stack
1833 needed for saving register we must allocate enough space for the
1834 worst case, because we don't always have the information early enough
1835 to not allocate it. But we can at least eliminate the actual loads
1836 and stores during the prologue/epilogue. */
1838 need_to_save_reg (int regno, int saving)
1840 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1843 && regno == PIC_OFFSET_TABLE_REGNUM
1844 && (!saving || crtl->uses_pic_offset_table)
1846 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1851 /* This function is only correct starting with local register
1854 spu_saved_regs_size (void)
1856 int reg_save_size = 0;
1859 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1860 if (need_to_save_reg (regno, 0))
1861 reg_save_size += 0x10;
1862 return reg_save_size;
1866 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1868 rtx reg = gen_rtx_REG (V4SImode, regno);
1870 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1871 return emit_insn (gen_movv4si (mem, reg));
1875 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1877 rtx reg = gen_rtx_REG (V4SImode, regno);
1879 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1880 return emit_insn (gen_movv4si (reg, mem));
1883 /* This happens after reload, so we need to expand it. */
1885 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1888 if (satisfies_constraint_K (GEN_INT (imm)))
1890 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1894 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1895 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1896 if (REGNO (src) == REGNO (scratch))
1902 /* Return nonzero if this function is known to have a null epilogue. */
1905 direct_return (void)
1907 if (reload_completed)
1909 if (cfun->static_chain_decl == 0
1910 && (spu_saved_regs_size ()
1912 + crtl->outgoing_args_size
1913 + crtl->args.pretend_args_size == 0)
1914 && current_function_is_leaf)
1921 The stack frame looks like this:
1925 AP -> +-------------+
1928 prev SP | back chain |
1931 | reg save | crtl->args.pretend_args_size bytes
1934 | saved regs | spu_saved_regs_size() bytes
1935 FP -> +-------------+
1937 | vars | get_frame_size() bytes
1938 HFP -> +-------------+
1941 | args | crtl->outgoing_args_size bytes
1947 SP -> +-------------+
1951 spu_expand_prologue (void)
1953 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1954 HOST_WIDE_INT total_size;
1955 HOST_WIDE_INT saved_regs_size;
1956 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1957 rtx scratch_reg_0, scratch_reg_1;
1960 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1961 the "toplevel" insn chain. */
1962 emit_note (NOTE_INSN_DELETED);
1964 if (flag_pic && optimize == 0)
1965 crtl->uses_pic_offset_table = 1;
1967 if (spu_naked_function_p (current_function_decl))
1970 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1971 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1973 saved_regs_size = spu_saved_regs_size ();
1974 total_size = size + saved_regs_size
1975 + crtl->outgoing_args_size
1976 + crtl->args.pretend_args_size;
1978 if (!current_function_is_leaf
1979 || cfun->calls_alloca || total_size > 0)
1980 total_size += STACK_POINTER_OFFSET;
1982 /* Save this first because code after this might use the link
1983 register as a scratch register. */
1984 if (!current_function_is_leaf)
1986 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1987 RTX_FRAME_RELATED_P (insn) = 1;
1992 offset = -crtl->args.pretend_args_size;
1993 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1994 if (need_to_save_reg (regno, 1))
1997 insn = frame_emit_store (regno, sp_reg, offset);
1998 RTX_FRAME_RELATED_P (insn) = 1;
2002 if (flag_pic && crtl->uses_pic_offset_table)
2004 rtx pic_reg = get_pic_reg ();
2005 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2006 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2011 if (flag_stack_check)
2013 /* We compare against total_size-1 because
2014 ($sp >= total_size) <=> ($sp > total_size-1) */
2015 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2016 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2017 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2018 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2020 emit_move_insn (scratch_v4si, size_v4si);
2021 size_v4si = scratch_v4si;
2023 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2024 emit_insn (gen_vec_extractv4si
2025 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2026 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2029 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2030 the value of the previous $sp because we save it as the back
2032 if (total_size <= 2000)
2034 /* In this case we save the back chain first. */
2035 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2037 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2041 insn = emit_move_insn (scratch_reg_0, sp_reg);
2043 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2045 RTX_FRAME_RELATED_P (insn) = 1;
2046 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2047 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2049 if (total_size > 2000)
2051 /* Save the back chain ptr */
2052 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2055 if (frame_pointer_needed)
2057 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2058 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2059 + crtl->outgoing_args_size;
2060 /* Set the new frame_pointer */
2061 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2062 RTX_FRAME_RELATED_P (insn) = 1;
2063 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2064 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2065 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2069 emit_note (NOTE_INSN_DELETED);
2073 spu_expand_epilogue (bool sibcall_p)
2075 int size = get_frame_size (), offset, regno;
2076 HOST_WIDE_INT saved_regs_size, total_size;
2077 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2078 rtx jump, scratch_reg_0;
2080 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2081 the "toplevel" insn chain. */
2082 emit_note (NOTE_INSN_DELETED);
2084 if (spu_naked_function_p (current_function_decl))
2087 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2089 saved_regs_size = spu_saved_regs_size ();
2090 total_size = size + saved_regs_size
2091 + crtl->outgoing_args_size
2092 + crtl->args.pretend_args_size;
2094 if (!current_function_is_leaf
2095 || cfun->calls_alloca || total_size > 0)
2096 total_size += STACK_POINTER_OFFSET;
2100 if (cfun->calls_alloca)
2101 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2103 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2106 if (saved_regs_size > 0)
2108 offset = -crtl->args.pretend_args_size;
2109 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2110 if (need_to_save_reg (regno, 1))
2113 frame_emit_load (regno, sp_reg, offset);
2118 if (!current_function_is_leaf)
2119 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2123 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2124 jump = emit_jump_insn (gen__return ());
2125 emit_barrier_after (jump);
2128 emit_note (NOTE_INSN_DELETED);
2132 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2136 /* This is inefficient because it ends up copying to a save-register
2137 which then gets saved even though $lr has already been saved. But
2138 it does generate better code for leaf functions and we don't need
2139 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2140 used for __builtin_return_address anyway, so maybe we don't care if
2141 it's inefficient. */
2142 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2146 /* Given VAL, generate a constant appropriate for MODE.
2147 If MODE is a vector mode, every element will be VAL.
2148 For TImode, VAL will be zero extended to 128 bits. */
2150 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2156 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2157 || GET_MODE_CLASS (mode) == MODE_FLOAT
2158 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2159 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2161 if (GET_MODE_CLASS (mode) == MODE_INT)
2162 return immed_double_const (val, 0, mode);
2164 /* val is the bit representation of the float */
2165 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2166 return hwint_to_const_double (mode, val);
2168 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2169 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2171 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2173 units = GET_MODE_NUNITS (mode);
2175 v = rtvec_alloc (units);
2177 for (i = 0; i < units; ++i)
2178 RTVEC_ELT (v, i) = inner;
2180 return gen_rtx_CONST_VECTOR (mode, v);
2183 /* Create a MODE vector constant from 4 ints. */
2185 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2187 unsigned char arr[16];
2188 arr[0] = (a >> 24) & 0xff;
2189 arr[1] = (a >> 16) & 0xff;
2190 arr[2] = (a >> 8) & 0xff;
2191 arr[3] = (a >> 0) & 0xff;
2192 arr[4] = (b >> 24) & 0xff;
2193 arr[5] = (b >> 16) & 0xff;
2194 arr[6] = (b >> 8) & 0xff;
2195 arr[7] = (b >> 0) & 0xff;
2196 arr[8] = (c >> 24) & 0xff;
2197 arr[9] = (c >> 16) & 0xff;
2198 arr[10] = (c >> 8) & 0xff;
2199 arr[11] = (c >> 0) & 0xff;
2200 arr[12] = (d >> 24) & 0xff;
2201 arr[13] = (d >> 16) & 0xff;
2202 arr[14] = (d >> 8) & 0xff;
2203 arr[15] = (d >> 0) & 0xff;
2204 return array_to_constant(mode, arr);
2207 /* branch hint stuff */
2209 /* An array of these is used to propagate hints to predecessor blocks. */
2212 rtx prop_jump; /* propagated from another block */
2213 int bb_index; /* the original block. */
2215 static struct spu_bb_info *spu_bb_info;
2217 #define STOP_HINT_P(INSN) \
2218 (GET_CODE(INSN) == CALL_INSN \
2219 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2220 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2222 /* 1 when RTX is a hinted branch or its target. We keep track of
2223 what has been hinted so the safe-hint code can test it easily. */
2224 #define HINTED_P(RTX) \
2225 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2227 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2228 #define SCHED_ON_EVEN_P(RTX) \
2229 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2231 /* Emit a nop for INSN such that the two will dual issue. This assumes
2232 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2233 We check for TImode to handle a MULTI1 insn which has dual issued its
2234 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2237 emit_nop_for_insn (rtx insn)
2241 p = get_pipe (insn);
2242 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2243 new_insn = emit_insn_after (gen_lnop (), insn);
2244 else if (p == 1 && GET_MODE (insn) == TImode)
2246 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2247 PUT_MODE (new_insn, TImode);
2248 PUT_MODE (insn, VOIDmode);
2251 new_insn = emit_insn_after (gen_lnop (), insn);
2252 recog_memoized (new_insn);
2255 /* Insert nops in basic blocks to meet dual issue alignment
2256 requirements. Also make sure hbrp and hint instructions are at least
2257 one cycle apart, possibly inserting a nop. */
2261 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2265 /* This sets up INSN_ADDRESSES. */
2266 shorten_branches (get_insns ());
2268 /* Keep track of length added by nops. */
2272 insn = get_insns ();
2273 if (!active_insn_p (insn))
2274 insn = next_active_insn (insn);
2275 for (; insn; insn = next_insn)
2277 next_insn = next_active_insn (insn);
2278 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2279 || INSN_CODE (insn) == CODE_FOR_hbr)
2283 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2284 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2285 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2288 prev_insn = emit_insn_before (gen_lnop (), insn);
2289 PUT_MODE (prev_insn, GET_MODE (insn));
2290 PUT_MODE (insn, TImode);
2296 if (INSN_CODE (insn) == CODE_FOR_blockage)
2298 if (GET_MODE (insn) == TImode)
2299 PUT_MODE (next_insn, TImode);
2301 next_insn = next_active_insn (insn);
2303 addr = INSN_ADDRESSES (INSN_UID (insn));
2304 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2306 if (((addr + length) & 7) != 0)
2308 emit_nop_for_insn (prev_insn);
2312 else if (GET_MODE (insn) == TImode
2313 && ((next_insn && GET_MODE (next_insn) != TImode)
2314 || get_attr_type (insn) == TYPE_MULTI0)
2315 && ((addr + length) & 7) != 0)
2317 /* prev_insn will always be set because the first insn is
2318 always 8-byte aligned. */
2319 emit_nop_for_insn (prev_insn);
2327 /* Routines for branch hints. */
2330 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2331 int distance, sbitmap blocks)
2333 rtx branch_label = 0;
2338 if (before == 0 || branch == 0 || target == 0)
2341 /* While scheduling we require hints to be no further than 600, so
2342 we need to enforce that here too */
2346 /* If we have a Basic block note, emit it after the basic block note. */
2347 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2348 before = NEXT_INSN (before);
2350 branch_label = gen_label_rtx ();
2351 LABEL_NUSES (branch_label)++;
2352 LABEL_PRESERVE_P (branch_label) = 1;
2353 insn = emit_label_before (branch_label, branch);
2354 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2355 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2357 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2358 recog_memoized (hint);
2359 HINTED_P (branch) = 1;
2361 if (GET_CODE (target) == LABEL_REF)
2362 HINTED_P (XEXP (target, 0)) = 1;
2363 else if (tablejump_p (branch, 0, &table))
2367 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2368 vec = XVEC (PATTERN (table), 0);
2370 vec = XVEC (PATTERN (table), 1);
2371 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2372 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2375 if (distance >= 588)
2377 /* Make sure the hint isn't scheduled any earlier than this point,
2378 which could make it too far for the branch offest to fit */
2379 recog_memoized (emit_insn_before (gen_blockage (), hint));
2381 else if (distance <= 8 * 4)
2383 /* To guarantee at least 8 insns between the hint and branch we
2386 for (d = distance; d < 8 * 4; d += 4)
2389 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2390 recog_memoized (insn);
2393 /* Make sure any nops inserted aren't scheduled before the hint. */
2394 recog_memoized (emit_insn_after (gen_blockage (), hint));
2396 /* Make sure any nops inserted aren't scheduled after the call. */
2397 if (CALL_P (branch) && distance < 8 * 4)
2398 recog_memoized (emit_insn_before (gen_blockage (), branch));
2402 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2403 the rtx for the branch target. */
2405 get_branch_target (rtx branch)
2407 if (GET_CODE (branch) == JUMP_INSN)
2411 /* Return statements */
2412 if (GET_CODE (PATTERN (branch)) == RETURN)
2413 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2416 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2417 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2421 if (extract_asm_operands (PATTERN (branch)) != NULL)
2424 set = single_set (branch);
2425 src = SET_SRC (set);
2426 if (GET_CODE (SET_DEST (set)) != PC)
2429 if (GET_CODE (src) == IF_THEN_ELSE)
2432 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2435 /* If the more probable case is not a fall through, then
2436 try a branch hint. */
2437 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2438 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2439 && GET_CODE (XEXP (src, 1)) != PC)
2440 lab = XEXP (src, 1);
2441 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2442 && GET_CODE (XEXP (src, 2)) != PC)
2443 lab = XEXP (src, 2);
2447 if (GET_CODE (lab) == RETURN)
2448 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2456 else if (GET_CODE (branch) == CALL_INSN)
2459 /* All of our call patterns are in a PARALLEL and the CALL is
2460 the first pattern in the PARALLEL. */
2461 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2463 call = XVECEXP (PATTERN (branch), 0, 0);
2464 if (GET_CODE (call) == SET)
2465 call = SET_SRC (call);
2466 if (GET_CODE (call) != CALL)
2468 return XEXP (XEXP (call, 0), 0);
2473 /* The special $hbr register is used to prevent the insn scheduler from
2474 moving hbr insns across instructions which invalidate them. It
2475 should only be used in a clobber, and this function searches for
2476 insns which clobber it. */
2478 insn_clobbers_hbr (rtx insn)
2481 && GET_CODE (PATTERN (insn)) == PARALLEL)
2483 rtx parallel = PATTERN (insn);
2486 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2488 clobber = XVECEXP (parallel, 0, j);
2489 if (GET_CODE (clobber) == CLOBBER
2490 && GET_CODE (XEXP (clobber, 0)) == REG
2491 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2498 /* Search up to 32 insns starting at FIRST:
2499 - at any kind of hinted branch, just return
2500 - at any unconditional branch in the first 15 insns, just return
2501 - at a call or indirect branch, after the first 15 insns, force it to
2502 an even address and return
2503 - at any unconditional branch, after the first 15 insns, force it to
2505 At then end of the search, insert an hbrp within 4 insns of FIRST,
2506 and an hbrp within 16 instructions of FIRST.
2509 insert_hbrp_for_ilb_runout (rtx first)
2511 rtx insn, before_4 = 0, before_16 = 0;
2512 int addr = 0, length, first_addr = -1;
2513 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2514 int insert_lnop_after = 0;
2515 for (insn = first; insn; insn = NEXT_INSN (insn))
2518 if (first_addr == -1)
2519 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2520 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2521 length = get_attr_length (insn);
2523 if (before_4 == 0 && addr + length >= 4 * 4)
2525 /* We test for 14 instructions because the first hbrp will add
2526 up to 2 instructions. */
2527 if (before_16 == 0 && addr + length >= 14 * 4)
2530 if (INSN_CODE (insn) == CODE_FOR_hbr)
2532 /* Make sure an hbrp is at least 2 cycles away from a hint.
2533 Insert an lnop after the hbrp when necessary. */
2534 if (before_4 == 0 && addr > 0)
2537 insert_lnop_after |= 1;
2539 else if (before_4 && addr <= 4 * 4)
2540 insert_lnop_after |= 1;
2541 if (before_16 == 0 && addr > 10 * 4)
2544 insert_lnop_after |= 2;
2546 else if (before_16 && addr <= 14 * 4)
2547 insert_lnop_after |= 2;
2550 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2552 if (addr < hbrp_addr0)
2554 else if (addr < hbrp_addr1)
2558 if (CALL_P (insn) || JUMP_P (insn))
2560 if (HINTED_P (insn))
2563 /* Any branch after the first 15 insns should be on an even
2564 address to avoid a special case branch. There might be
2565 some nops and/or hbrps inserted, so we test after 10
2568 SCHED_ON_EVEN_P (insn) = 1;
2571 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2575 if (addr + length >= 32 * 4)
2577 gcc_assert (before_4 && before_16);
2578 if (hbrp_addr0 > 4 * 4)
2581 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2582 recog_memoized (insn);
2583 INSN_ADDRESSES_NEW (insn,
2584 INSN_ADDRESSES (INSN_UID (before_4)));
2585 PUT_MODE (insn, GET_MODE (before_4));
2586 PUT_MODE (before_4, TImode);
2587 if (insert_lnop_after & 1)
2589 insn = emit_insn_before (gen_lnop (), before_4);
2590 recog_memoized (insn);
2591 INSN_ADDRESSES_NEW (insn,
2592 INSN_ADDRESSES (INSN_UID (before_4)));
2593 PUT_MODE (insn, TImode);
2596 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2597 && hbrp_addr1 > 16 * 4)
2600 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2601 recog_memoized (insn);
2602 INSN_ADDRESSES_NEW (insn,
2603 INSN_ADDRESSES (INSN_UID (before_16)));
2604 PUT_MODE (insn, GET_MODE (before_16));
2605 PUT_MODE (before_16, TImode);
2606 if (insert_lnop_after & 2)
2608 insn = emit_insn_before (gen_lnop (), before_16);
2609 recog_memoized (insn);
2610 INSN_ADDRESSES_NEW (insn,
2611 INSN_ADDRESSES (INSN_UID
2613 PUT_MODE (insn, TImode);
2619 else if (BARRIER_P (insn))
2624 /* The SPU might hang when it executes 48 inline instructions after a
2625 hinted branch jumps to its hinted target. The beginning of a
2626 function and the return from a call might have been hinted, and must
2627 be handled as well. To prevent a hang we insert 2 hbrps. The first
2628 should be within 6 insns of the branch target. The second should be
2629 within 22 insns of the branch target. When determining if hbrps are
2630 necessary, we look for only 32 inline instructions, because up to to
2631 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2632 new hbrps, we insert them within 4 and 16 insns of the target. */
2637 if (TARGET_SAFE_HINTS)
2639 shorten_branches (get_insns ());
2640 /* Insert hbrp at beginning of function */
2641 insn = next_active_insn (get_insns ());
2643 insert_hbrp_for_ilb_runout (insn);
2644 /* Insert hbrp after hinted targets. */
2645 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2646 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2647 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2651 static int in_spu_reorg;
2653 /* Insert branch hints. There are no branch optimizations after this
2654 pass, so it's safe to set our branch hints now. */
2656 spu_machine_dependent_reorg (void)
2661 rtx branch_target = 0;
2662 int branch_addr = 0, insn_addr, required_dist = 0;
2666 if (!TARGET_BRANCH_HINTS || optimize == 0)
2668 /* We still do it for unoptimized code because an external
2669 function might have hinted a call or return. */
2675 blocks = sbitmap_alloc (last_basic_block);
2676 sbitmap_zero (blocks);
2679 compute_bb_for_insn ();
2684 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2685 sizeof (struct spu_bb_info));
2687 /* We need exact insn addresses and lengths. */
2688 shorten_branches (get_insns ());
2690 for (i = n_basic_blocks - 1; i >= 0; i--)
2692 bb = BASIC_BLOCK (i);
2694 if (spu_bb_info[i].prop_jump)
2696 branch = spu_bb_info[i].prop_jump;
2697 branch_target = get_branch_target (branch);
2698 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2699 required_dist = spu_hint_dist;
2701 /* Search from end of a block to beginning. In this loop, find
2702 jumps which need a branch and emit them only when:
2703 - it's an indirect branch and we're at the insn which sets
2705 - we're at an insn that will invalidate the hint. e.g., a
2706 call, another hint insn, inline asm that clobbers $hbr, and
2707 some inlined operations (divmodsi4). Don't consider jumps
2708 because they are only at the end of a block and are
2709 considered when we are deciding whether to propagate
2710 - we're getting too far away from the branch. The hbr insns
2711 only have a signed 10 bit offset
2712 We go back as far as possible so the branch will be considered
2713 for propagation when we get to the beginning of the block. */
2714 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2718 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2720 && ((GET_CODE (branch_target) == REG
2721 && set_of (branch_target, insn) != NULL_RTX)
2722 || insn_clobbers_hbr (insn)
2723 || branch_addr - insn_addr > 600))
2725 rtx next = NEXT_INSN (insn);
2726 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2727 if (insn != BB_END (bb)
2728 && branch_addr - next_addr >= required_dist)
2732 "hint for %i in block %i before %i\n",
2733 INSN_UID (branch), bb->index,
2735 spu_emit_branch_hint (next, branch, branch_target,
2736 branch_addr - next_addr, blocks);
2741 /* JUMP_P will only be true at the end of a block. When
2742 branch is already set it means we've previously decided
2743 to propagate a hint for that branch into this block. */
2744 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2747 if ((branch_target = get_branch_target (insn)))
2750 branch_addr = insn_addr;
2751 required_dist = spu_hint_dist;
2755 if (insn == BB_HEAD (bb))
2761 /* If we haven't emitted a hint for this branch yet, it might
2762 be profitable to emit it in one of the predecessor blocks,
2763 especially for loops. */
2765 basic_block prev = 0, prop = 0, prev2 = 0;
2766 int loop_exit = 0, simple_loop = 0;
2767 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2769 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2770 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2771 prev = EDGE_PRED (bb, j)->src;
2773 prev2 = EDGE_PRED (bb, j)->src;
2775 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2776 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2778 else if (EDGE_SUCC (bb, j)->dest == bb)
2781 /* If this branch is a loop exit then propagate to previous
2782 fallthru block. This catches the cases when it is a simple
2783 loop or when there is an initial branch into the loop. */
2784 if (prev && (loop_exit || simple_loop)
2785 && prev->loop_depth <= bb->loop_depth)
2788 /* If there is only one adjacent predecessor. Don't propagate
2789 outside this loop. This loop_depth test isn't perfect, but
2790 I'm not sure the loop_father member is valid at this point. */
2791 else if (prev && single_pred_p (bb)
2792 && prev->loop_depth == bb->loop_depth)
2795 /* If this is the JOIN block of a simple IF-THEN then
2796 propogate the hint to the HEADER block. */
2797 else if (prev && prev2
2798 && EDGE_COUNT (bb->preds) == 2
2799 && EDGE_COUNT (prev->preds) == 1
2800 && EDGE_PRED (prev, 0)->src == prev2
2801 && prev2->loop_depth == bb->loop_depth
2802 && GET_CODE (branch_target) != REG)
2805 /* Don't propagate when:
2806 - this is a simple loop and the hint would be too far
2807 - this is not a simple loop and there are 16 insns in
2809 - the predecessor block ends in a branch that will be
2811 - the predecessor block ends in an insn that invalidates
2815 && (bbend = BB_END (prop))
2816 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2817 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2818 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2821 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2822 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2823 bb->index, prop->index, bb->loop_depth,
2824 INSN_UID (branch), loop_exit, simple_loop,
2825 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2827 spu_bb_info[prop->index].prop_jump = branch;
2828 spu_bb_info[prop->index].bb_index = i;
2830 else if (branch_addr - next_addr >= required_dist)
2833 fprintf (dump_file, "hint for %i in block %i before %i\n",
2834 INSN_UID (branch), bb->index,
2835 INSN_UID (NEXT_INSN (insn)));
2836 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2837 branch_addr - next_addr, blocks);
2844 if (!sbitmap_empty_p (blocks))
2845 find_many_sub_basic_blocks (blocks);
2847 /* We have to schedule to make sure alignment is ok. */
2848 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2850 /* The hints need to be scheduled, so call it again. */
2857 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2858 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2860 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2861 between its branch label and the branch . We don't move the
2862 label because GCC expects it at the beginning of the block. */
2863 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2864 rtx label_ref = XVECEXP (unspec, 0, 0);
2865 rtx label = XEXP (label_ref, 0);
2868 for (branch = NEXT_INSN (label);
2869 !JUMP_P (branch) && !CALL_P (branch);
2870 branch = NEXT_INSN (branch))
2871 if (NONJUMP_INSN_P (branch))
2872 offset += get_attr_length (branch);
2874 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2877 if (spu_flag_var_tracking)
2880 timevar_push (TV_VAR_TRACKING);
2881 variable_tracking_main ();
2882 timevar_pop (TV_VAR_TRACKING);
2883 df_finish_pass (false);
2886 free_bb_for_insn ();
2892 /* Insn scheduling routines, primarily for dual issue. */
2894 spu_sched_issue_rate (void)
2900 uses_ls_unit(rtx insn)
2902 rtx set = single_set (insn);
2904 && (GET_CODE (SET_DEST (set)) == MEM
2905 || GET_CODE (SET_SRC (set)) == MEM))
2914 /* Handle inline asm */
2915 if (INSN_CODE (insn) == -1)
2917 t = get_attr_type (insn);
2942 case TYPE_IPREFETCH:
2950 /* haifa-sched.c has a static variable that keeps track of the current
2951 cycle. It is passed to spu_sched_reorder, and we record it here for
2952 use by spu_sched_variable_issue. It won't be accurate if the
2953 scheduler updates it's clock_var between the two calls. */
2954 static int clock_var;
2956 /* This is used to keep track of insn alignment. Set to 0 at the
2957 beginning of each block and increased by the "length" attr of each
2959 static int spu_sched_length;
2961 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2962 ready list appropriately in spu_sched_reorder(). */
2963 static int pipe0_clock;
2964 static int pipe1_clock;
2966 static int prev_clock_var;
2968 static int prev_priority;
2970 /* The SPU needs to load the next ilb sometime during the execution of
2971 the previous ilb. There is a potential conflict if every cycle has a
2972 load or store. To avoid the conflict we make sure the load/store
2973 unit is free for at least one cycle during the execution of insns in
2974 the previous ilb. */
2975 static int spu_ls_first;
2976 static int prev_ls_clock;
2979 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2980 int max_ready ATTRIBUTE_UNUSED)
2982 spu_sched_length = 0;
2986 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2987 int max_ready ATTRIBUTE_UNUSED)
2989 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2991 /* When any block might be at least 8-byte aligned, assume they
2992 will all be at least 8-byte aligned to make sure dual issue
2993 works out correctly. */
2994 spu_sched_length = 0;
2996 spu_ls_first = INT_MAX;
3001 prev_clock_var = -1;
3006 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3007 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
3011 if (GET_CODE (PATTERN (insn)) == USE
3012 || GET_CODE (PATTERN (insn)) == CLOBBER
3013 || (len = get_attr_length (insn)) == 0)
3016 spu_sched_length += len;
3018 /* Reset on inline asm */
3019 if (INSN_CODE (insn) == -1)
3021 spu_ls_first = INT_MAX;
3026 p = get_pipe (insn);
3028 pipe0_clock = clock_var;
3030 pipe1_clock = clock_var;
3034 if (clock_var - prev_ls_clock > 1
3035 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3036 spu_ls_first = INT_MAX;
3037 if (uses_ls_unit (insn))
3039 if (spu_ls_first == INT_MAX)
3040 spu_ls_first = spu_sched_length;
3041 prev_ls_clock = clock_var;
3044 /* The scheduler hasn't inserted the nop, but we will later on.
3045 Include those nops in spu_sched_length. */
3046 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3047 spu_sched_length += 4;
3048 prev_clock_var = clock_var;
3050 /* more is -1 when called from spu_sched_reorder for new insns
3051 that don't have INSN_PRIORITY */
3053 prev_priority = INSN_PRIORITY (insn);
3056 /* Always try issueing more insns. spu_sched_reorder will decide
3057 when the cycle should be advanced. */
3061 /* This function is called for both TARGET_SCHED_REORDER and
3062 TARGET_SCHED_REORDER2. */
3064 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3065 rtx *ready, int *nreadyp, int clock)
3067 int i, nready = *nreadyp;
3068 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3073 if (nready <= 0 || pipe1_clock >= clock)
3076 /* Find any rtl insns that don't generate assembly insns and schedule
3078 for (i = nready - 1; i >= 0; i--)
3081 if (INSN_CODE (insn) == -1
3082 || INSN_CODE (insn) == CODE_FOR_blockage
3083 || (INSN_P (insn) && get_attr_length (insn) == 0))
3085 ready[i] = ready[nready - 1];
3086 ready[nready - 1] = insn;
3091 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3092 for (i = 0; i < nready; i++)
3093 if (INSN_CODE (ready[i]) != -1)
3096 switch (get_attr_type (insn))
3121 case TYPE_IPREFETCH:
3127 /* In the first scheduling phase, schedule loads and stores together
3128 to increase the chance they will get merged during postreload CSE. */
3129 if (!reload_completed && pipe_ls >= 0)
3131 insn = ready[pipe_ls];
3132 ready[pipe_ls] = ready[nready - 1];
3133 ready[nready - 1] = insn;
3137 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3141 /* When we have loads/stores in every cycle of the last 15 insns and
3142 we are about to schedule another load/store, emit an hbrp insn
3145 && spu_sched_length - spu_ls_first >= 4 * 15
3146 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3148 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3149 recog_memoized (insn);
3150 if (pipe0_clock < clock)
3151 PUT_MODE (insn, TImode);
3152 spu_sched_variable_issue (file, verbose, insn, -1);
3156 /* In general, we want to emit nops to increase dual issue, but dual
3157 issue isn't faster when one of the insns could be scheduled later
3158 without effecting the critical path. We look at INSN_PRIORITY to
3159 make a good guess, but it isn't perfect so -mdual-nops=n can be
3160 used to effect it. */
3161 if (in_spu_reorg && spu_dual_nops < 10)
3163 /* When we are at an even address and we are not issueing nops to
3164 improve scheduling then we need to advance the cycle. */
3165 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3166 && (spu_dual_nops == 0
3169 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3172 /* When at an odd address, schedule the highest priority insn
3173 without considering pipeline. */
3174 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3175 && (spu_dual_nops == 0
3177 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3182 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3183 pipe0 insn in the ready list, schedule it. */
3184 if (pipe0_clock < clock && pipe_0 >= 0)
3185 schedule_i = pipe_0;
3187 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3188 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3190 schedule_i = pipe_1;
3192 if (schedule_i > -1)
3194 insn = ready[schedule_i];
3195 ready[schedule_i] = ready[nready - 1];
3196 ready[nready - 1] = insn;
3202 /* INSN is dependent on DEP_INSN. */
3204 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3208 /* The blockage pattern is used to prevent instructions from being
3209 moved across it and has no cost. */
3210 if (INSN_CODE (insn) == CODE_FOR_blockage
3211 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3214 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3215 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3218 /* Make sure hbrps are spread out. */
3219 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3220 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3223 /* Make sure hints and hbrps are 2 cycles apart. */
3224 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3225 || INSN_CODE (insn) == CODE_FOR_hbr)
3226 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3227 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3230 /* An hbrp has no real dependency on other insns. */
3231 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3232 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3235 /* Assuming that it is unlikely an argument register will be used in
3236 the first cycle of the called function, we reduce the cost for
3237 slightly better scheduling of dep_insn. When not hinted, the
3238 mispredicted branch would hide the cost as well. */
3241 rtx target = get_branch_target (insn);
3242 if (GET_CODE (target) != REG || !set_of (target, insn))
3247 /* And when returning from a function, let's assume the return values
3248 are completed sooner too. */
3249 if (CALL_P (dep_insn))
3252 /* Make sure an instruction that loads from the back chain is schedule
3253 away from the return instruction so a hint is more likely to get
3255 if (INSN_CODE (insn) == CODE_FOR__return
3256 && (set = single_set (dep_insn))
3257 && GET_CODE (SET_DEST (set)) == REG
3258 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3261 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3262 scheduler makes every insn in a block anti-dependent on the final
3263 jump_insn. We adjust here so higher cost insns will get scheduled
3265 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3266 return insn_cost (dep_insn) - 3;
3271 /* Create a CONST_DOUBLE from a string. */
3273 spu_float_const (const char *string, enum machine_mode mode)
3275 REAL_VALUE_TYPE value;
3276 value = REAL_VALUE_ATOF (string, mode);
3277 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3281 spu_constant_address_p (rtx x)
3283 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3284 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3285 || GET_CODE (x) == HIGH);
3288 static enum spu_immediate
3289 which_immediate_load (HOST_WIDE_INT val)
3291 gcc_assert (val == trunc_int_for_mode (val, SImode));
3293 if (val >= -0x8000 && val <= 0x7fff)
3295 if (val >= 0 && val <= 0x3ffff)
3297 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3299 if ((val & 0xffff) == 0)
3305 /* Return true when OP can be loaded by one of the il instructions, or
3306 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3308 immediate_load_p (rtx op, enum machine_mode mode)
3310 if (CONSTANT_P (op))
3312 enum immediate_class c = classify_immediate (op, mode);
3313 return c == IC_IL1 || c == IC_IL1s
3314 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3319 /* Return true if the first SIZE bytes of arr is a constant that can be
3320 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3321 represent the size and offset of the instruction to use. */
3323 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3325 int cpat, run, i, start;
3329 for (i = 0; i < size && cpat; i++)
3337 else if (arr[i] == 2 && arr[i+1] == 3)
3339 else if (arr[i] == 0)
3341 while (arr[i+run] == run && i+run < 16)
3343 if (run != 4 && run != 8)
3348 if ((i & (run-1)) != 0)
3355 if (cpat && (run || size < 16))
3362 *pstart = start == -1 ? 16-run : start;
3368 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3369 it into a register. MODE is only valid when OP is a CONST_INT. */
3370 static enum immediate_class
3371 classify_immediate (rtx op, enum machine_mode mode)
3374 unsigned char arr[16];
3375 int i, j, repeated, fsmbi, repeat;
3377 gcc_assert (CONSTANT_P (op));
3379 if (GET_MODE (op) != VOIDmode)
3380 mode = GET_MODE (op);
3382 /* A V4SI const_vector with all identical symbols is ok. */
3385 && GET_CODE (op) == CONST_VECTOR
3386 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3387 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3388 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3389 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3390 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3391 op = CONST_VECTOR_ELT (op, 0);
3393 switch (GET_CODE (op))
3397 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3400 /* We can never know if the resulting address fits in 18 bits and can be
3401 loaded with ila. For now, assume the address will not overflow if
3402 the displacement is "small" (fits 'K' constraint). */
3403 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3405 rtx sym = XEXP (XEXP (op, 0), 0);
3406 rtx cst = XEXP (XEXP (op, 0), 1);
3408 if (GET_CODE (sym) == SYMBOL_REF
3409 && GET_CODE (cst) == CONST_INT
3410 && satisfies_constraint_K (cst))
3419 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3420 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3421 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3427 constant_to_array (mode, op, arr);
3429 /* Check that each 4-byte slot is identical. */
3431 for (i = 4; i < 16; i += 4)
3432 for (j = 0; j < 4; j++)
3433 if (arr[j] != arr[i + j])
3438 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3439 val = trunc_int_for_mode (val, SImode);
3441 if (which_immediate_load (val) != SPU_NONE)
3445 /* Any mode of 2 bytes or smaller can be loaded with an il
3447 gcc_assert (GET_MODE_SIZE (mode) > 2);
3451 for (i = 0; i < 16 && fsmbi; i++)
3452 if (arr[i] != 0 && repeat == 0)
3454 else if (arr[i] != 0 && arr[i] != repeat)
3457 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3459 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3472 static enum spu_immediate
3473 which_logical_immediate (HOST_WIDE_INT val)
3475 gcc_assert (val == trunc_int_for_mode (val, SImode));
3477 if (val >= -0x200 && val <= 0x1ff)
3479 if (val >= 0 && val <= 0xffff)
3481 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3483 val = trunc_int_for_mode (val, HImode);
3484 if (val >= -0x200 && val <= 0x1ff)
3486 if ((val & 0xff) == ((val >> 8) & 0xff))
3488 val = trunc_int_for_mode (val, QImode);
3489 if (val >= -0x200 && val <= 0x1ff)
3496 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3499 const_vector_immediate_p (rtx x)
3502 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3503 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3504 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3505 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3511 logical_immediate_p (rtx op, enum machine_mode mode)
3514 unsigned char arr[16];
3517 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3518 || GET_CODE (op) == CONST_VECTOR);
3520 if (GET_CODE (op) == CONST_VECTOR
3521 && !const_vector_immediate_p (op))
3524 if (GET_MODE (op) != VOIDmode)
3525 mode = GET_MODE (op);
3527 constant_to_array (mode, op, arr);
3529 /* Check that bytes are repeated. */
3530 for (i = 4; i < 16; i += 4)
3531 for (j = 0; j < 4; j++)
3532 if (arr[j] != arr[i + j])
3535 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3536 val = trunc_int_for_mode (val, SImode);
3538 i = which_logical_immediate (val);
3539 return i != SPU_NONE && i != SPU_IOHL;
3543 iohl_immediate_p (rtx op, enum machine_mode mode)
3546 unsigned char arr[16];
3549 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3550 || GET_CODE (op) == CONST_VECTOR);
3552 if (GET_CODE (op) == CONST_VECTOR
3553 && !const_vector_immediate_p (op))
3556 if (GET_MODE (op) != VOIDmode)
3557 mode = GET_MODE (op);
3559 constant_to_array (mode, op, arr);
3561 /* Check that bytes are repeated. */
3562 for (i = 4; i < 16; i += 4)
3563 for (j = 0; j < 4; j++)
3564 if (arr[j] != arr[i + j])
3567 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3568 val = trunc_int_for_mode (val, SImode);
3570 return val >= 0 && val <= 0xffff;
3574 arith_immediate_p (rtx op, enum machine_mode mode,
3575 HOST_WIDE_INT low, HOST_WIDE_INT high)
3578 unsigned char arr[16];
3581 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3582 || GET_CODE (op) == CONST_VECTOR);
3584 if (GET_CODE (op) == CONST_VECTOR
3585 && !const_vector_immediate_p (op))
3588 if (GET_MODE (op) != VOIDmode)
3589 mode = GET_MODE (op);
3591 constant_to_array (mode, op, arr);
3593 if (VECTOR_MODE_P (mode))
3594 mode = GET_MODE_INNER (mode);
3596 bytes = GET_MODE_SIZE (mode);
3597 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3599 /* Check that bytes are repeated. */
3600 for (i = bytes; i < 16; i += bytes)
3601 for (j = 0; j < bytes; j++)
3602 if (arr[j] != arr[i + j])
3606 for (j = 1; j < bytes; j++)
3607 val = (val << 8) | arr[j];
3609 val = trunc_int_for_mode (val, mode);
3611 return val >= low && val <= high;
3614 /* TRUE when op is an immediate and an exact power of 2, and given that
3615 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3616 all entries must be the same. */
3618 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3620 enum machine_mode int_mode;
3622 unsigned char arr[16];
3625 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3626 || GET_CODE (op) == CONST_VECTOR);
3628 if (GET_CODE (op) == CONST_VECTOR
3629 && !const_vector_immediate_p (op))
3632 if (GET_MODE (op) != VOIDmode)
3633 mode = GET_MODE (op);
3635 constant_to_array (mode, op, arr);
3637 if (VECTOR_MODE_P (mode))
3638 mode = GET_MODE_INNER (mode);
3640 bytes = GET_MODE_SIZE (mode);
3641 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3643 /* Check that bytes are repeated. */
3644 for (i = bytes; i < 16; i += bytes)
3645 for (j = 0; j < bytes; j++)
3646 if (arr[j] != arr[i + j])
3650 for (j = 1; j < bytes; j++)
3651 val = (val << 8) | arr[j];
3653 val = trunc_int_for_mode (val, int_mode);
3655 /* Currently, we only handle SFmode */
3656 gcc_assert (mode == SFmode);
3659 int exp = (val >> 23) - 127;
3660 return val > 0 && (val & 0x007fffff) == 0
3661 && exp >= low && exp <= high;
3666 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3669 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3674 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3676 rtx plus = XEXP (x, 0);
3677 rtx op0 = XEXP (plus, 0);
3678 rtx op1 = XEXP (plus, 1);
3679 if (GET_CODE (op1) == CONST_INT)
3683 return (GET_CODE (x) == SYMBOL_REF
3684 && (decl = SYMBOL_REF_DECL (x)) != 0
3685 && TREE_CODE (decl) == VAR_DECL
3686 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3690 - any 32-bit constant (SImode, SFmode)
3691 - any constant that can be generated with fsmbi (any mode)
3692 - a 64-bit constant where the high and low bits are identical
3694 - a 128-bit constant where the four 32-bit words match. */
3696 spu_legitimate_constant_p (rtx x)
3698 if (GET_CODE (x) == HIGH)
3701 /* Reject any __ea qualified reference. These can't appear in
3702 instructions but must be forced to the constant pool. */
3703 if (for_each_rtx (&x, ea_symbol_ref, 0))
3706 /* V4SI with all identical symbols is valid. */
3708 && GET_MODE (x) == V4SImode
3709 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3710 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3711 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3712 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3713 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3714 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3716 if (GET_CODE (x) == CONST_VECTOR
3717 && !const_vector_immediate_p (x))
3722 /* Valid address are:
3723 - symbol_ref, label_ref, const
3725 - reg + const_int, where const_int is 16 byte aligned
3726 - reg + reg, alignment doesn't matter
3727 The alignment matters in the reg+const case because lqd and stqd
3728 ignore the 4 least significant bits of the const. We only care about
3729 16 byte modes because the expand phase will change all smaller MEM
3730 references to TImode. */
3732 spu_legitimate_address_p (enum machine_mode mode,
3733 rtx x, bool reg_ok_strict)
3735 int aligned = GET_MODE_SIZE (mode) >= 16;
3737 && GET_CODE (x) == AND
3738 && GET_CODE (XEXP (x, 1)) == CONST_INT
3739 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3741 switch (GET_CODE (x))
3744 return !TARGET_LARGE_MEM;
3748 /* Keep __ea references until reload so that spu_expand_mov can see them
3750 if (ea_symbol_ref (&x, 0))
3751 return !reload_in_progress && !reload_completed;
3752 return !TARGET_LARGE_MEM;
3755 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3763 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3768 rtx op0 = XEXP (x, 0);
3769 rtx op1 = XEXP (x, 1);
3770 if (GET_CODE (op0) == SUBREG)
3771 op0 = XEXP (op0, 0);
3772 if (GET_CODE (op1) == SUBREG)
3773 op1 = XEXP (op1, 0);
3774 if (GET_CODE (op0) == REG
3775 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3776 && GET_CODE (op1) == CONST_INT
3777 && INTVAL (op1) >= -0x2000
3778 && INTVAL (op1) <= 0x1fff
3779 && (!aligned || (INTVAL (op1) & 15) == 0))
3781 if (GET_CODE (op0) == REG
3782 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3783 && GET_CODE (op1) == REG
3784 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3795 /* Like spu_legitimate_address_p, except with named addresses. */
3797 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3798 bool reg_ok_strict, addr_space_t as)
3800 if (as == ADDR_SPACE_EA)
3801 return (REG_P (x) && (GET_MODE (x) == EAmode));
3803 else if (as != ADDR_SPACE_GENERIC)
3806 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3809 /* When the address is reg + const_int, force the const_int into a
3812 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3813 enum machine_mode mode ATTRIBUTE_UNUSED)
3816 /* Make sure both operands are registers. */
3817 if (GET_CODE (x) == PLUS)
3821 if (ALIGNED_SYMBOL_REF_P (op0))
3823 op0 = force_reg (Pmode, op0);
3824 mark_reg_pointer (op0, 128);
3826 else if (GET_CODE (op0) != REG)
3827 op0 = force_reg (Pmode, op0);
3828 if (ALIGNED_SYMBOL_REF_P (op1))
3830 op1 = force_reg (Pmode, op1);
3831 mark_reg_pointer (op1, 128);
3833 else if (GET_CODE (op1) != REG)
3834 op1 = force_reg (Pmode, op1);
3835 x = gen_rtx_PLUS (Pmode, op0, op1);
3840 /* Like spu_legitimate_address, except with named address support. */
3842 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3845 if (as != ADDR_SPACE_GENERIC)
3848 return spu_legitimize_address (x, oldx, mode);
3851 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3852 struct attribute_spec.handler. */
3854 spu_handle_fndecl_attribute (tree * node,
3856 tree args ATTRIBUTE_UNUSED,
3857 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3859 if (TREE_CODE (*node) != FUNCTION_DECL)
3861 warning (0, "%qE attribute only applies to functions",
3863 *no_add_attrs = true;
3869 /* Handle the "vector" attribute. */
3871 spu_handle_vector_attribute (tree * node, tree name,
3872 tree args ATTRIBUTE_UNUSED,
3873 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3875 tree type = *node, result = NULL_TREE;
3876 enum machine_mode mode;
3879 while (POINTER_TYPE_P (type)
3880 || TREE_CODE (type) == FUNCTION_TYPE
3881 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3882 type = TREE_TYPE (type);
3884 mode = TYPE_MODE (type);
3886 unsigned_p = TYPE_UNSIGNED (type);
3890 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3893 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3896 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3899 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3902 result = V4SF_type_node;
3905 result = V2DF_type_node;
3911 /* Propagate qualifiers attached to the element type
3912 onto the vector type. */
3913 if (result && result != type && TYPE_QUALS (type))
3914 result = build_qualified_type (result, TYPE_QUALS (type));
3916 *no_add_attrs = true; /* No need to hang on to the attribute. */
3919 warning (0, "%qE attribute ignored", name);
3921 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3926 /* Return nonzero if FUNC is a naked function. */
3928 spu_naked_function_p (tree func)
3932 if (TREE_CODE (func) != FUNCTION_DECL)
3935 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3936 return a != NULL_TREE;
3940 spu_initial_elimination_offset (int from, int to)
3942 int saved_regs_size = spu_saved_regs_size ();
3944 if (!current_function_is_leaf || crtl->outgoing_args_size
3945 || get_frame_size () || saved_regs_size)
3946 sp_offset = STACK_POINTER_OFFSET;
3947 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3948 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3949 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3950 return get_frame_size ();
3951 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3952 return sp_offset + crtl->outgoing_args_size
3953 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3954 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3955 return get_frame_size () + saved_regs_size + sp_offset;
3961 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3963 enum machine_mode mode = TYPE_MODE (type);
3964 int byte_size = ((mode == BLKmode)
3965 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3967 /* Make sure small structs are left justified in a register. */
3968 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3969 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3971 enum machine_mode smode;
3974 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3975 int n = byte_size / UNITS_PER_WORD;
3976 v = rtvec_alloc (nregs);
3977 for (i = 0; i < n; i++)
3979 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3980 gen_rtx_REG (TImode,
3983 GEN_INT (UNITS_PER_WORD * i));
3984 byte_size -= UNITS_PER_WORD;
3992 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3994 gen_rtx_EXPR_LIST (VOIDmode,
3995 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3996 GEN_INT (UNITS_PER_WORD * n));
3998 return gen_rtx_PARALLEL (mode, v);
4000 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4004 spu_function_arg (CUMULATIVE_ARGS cum,
4005 enum machine_mode mode,
4006 tree type, int named ATTRIBUTE_UNUSED)
4010 if (cum >= MAX_REGISTER_ARGS)
4013 byte_size = ((mode == BLKmode)
4014 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4016 /* The ABI does not allow parameters to be passed partially in
4017 reg and partially in stack. */
4018 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4021 /* Make sure small structs are left justified in a register. */
4022 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4023 && byte_size < UNITS_PER_WORD && byte_size > 0)
4025 enum machine_mode smode;
4029 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4030 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4031 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
4033 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4036 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
4039 /* Variable sized types are passed by reference. */
4041 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4042 enum machine_mode mode ATTRIBUTE_UNUSED,
4043 const_tree type, bool named ATTRIBUTE_UNUSED)
4045 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4051 /* Create and return the va_list datatype.
4053 On SPU, va_list is an array type equivalent to
4055 typedef struct __va_list_tag
4057 void *__args __attribute__((__aligned(16)));
4058 void *__skip __attribute__((__aligned(16)));
4062 where __args points to the arg that will be returned by the next
4063 va_arg(), and __skip points to the previous stack frame such that
4064 when __args == __skip we should advance __args by 32 bytes. */
4066 spu_build_builtin_va_list (void)
4068 tree f_args, f_skip, record, type_decl;
4071 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4074 build_decl (BUILTINS_LOCATION,
4075 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4077 f_args = build_decl (BUILTINS_LOCATION,
4078 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4079 f_skip = build_decl (BUILTINS_LOCATION,
4080 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4082 DECL_FIELD_CONTEXT (f_args) = record;
4083 DECL_ALIGN (f_args) = 128;
4084 DECL_USER_ALIGN (f_args) = 1;
4086 DECL_FIELD_CONTEXT (f_skip) = record;
4087 DECL_ALIGN (f_skip) = 128;
4088 DECL_USER_ALIGN (f_skip) = 1;
4090 TREE_CHAIN (record) = type_decl;
4091 TYPE_NAME (record) = type_decl;
4092 TYPE_FIELDS (record) = f_args;
4093 TREE_CHAIN (f_args) = f_skip;
4095 /* We know this is being padded and we want it too. It is an internal
4096 type so hide the warnings from the user. */
4098 warn_padded = false;
4100 layout_type (record);
4104 /* The correct type is an array type of one element. */
4105 return build_array_type (record, build_index_type (size_zero_node));
4108 /* Implement va_start by filling the va_list structure VALIST.
4109 NEXTARG points to the first anonymous stack argument.
4111 The following global variables are used to initialize
4112 the va_list structure:
4115 the CUMULATIVE_ARGS for this function
4117 crtl->args.arg_offset_rtx:
4118 holds the offset of the first anonymous stack argument
4119 (relative to the virtual arg pointer). */
4122 spu_va_start (tree valist, rtx nextarg)
4124 tree f_args, f_skip;
4127 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4128 f_skip = TREE_CHAIN (f_args);
4130 valist = build_va_arg_indirect_ref (valist);
4132 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4134 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4136 /* Find the __args area. */
4137 t = make_tree (TREE_TYPE (args), nextarg);
4138 if (crtl->args.pretend_args_size > 0)
4139 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4140 size_int (-STACK_POINTER_OFFSET));
4141 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4142 TREE_SIDE_EFFECTS (t) = 1;
4143 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4145 /* Find the __skip area. */
4146 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4147 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4148 size_int (crtl->args.pretend_args_size
4149 - STACK_POINTER_OFFSET));
4150 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4151 TREE_SIDE_EFFECTS (t) = 1;
4152 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4155 /* Gimplify va_arg by updating the va_list structure
4156 VALIST as required to retrieve an argument of type
4157 TYPE, and returning that argument.
4159 ret = va_arg(VALIST, TYPE);
4161 generates code equivalent to:
4163 paddedsize = (sizeof(TYPE) + 15) & -16;
4164 if (VALIST.__args + paddedsize > VALIST.__skip
4165 && VALIST.__args <= VALIST.__skip)
4166 addr = VALIST.__skip + 32;
4168 addr = VALIST.__args;
4169 VALIST.__args = addr + paddedsize;
4170 ret = *(TYPE *)addr;
4173 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4174 gimple_seq * post_p ATTRIBUTE_UNUSED)
4176 tree f_args, f_skip;
4178 HOST_WIDE_INT size, rsize;
4179 tree paddedsize, addr, tmp;
4180 bool pass_by_reference_p;
4182 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4183 f_skip = TREE_CHAIN (f_args);
4185 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4187 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4189 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4191 addr = create_tmp_var (ptr_type_node, "va_arg");
4193 /* if an object is dynamically sized, a pointer to it is passed
4194 instead of the object itself. */
4195 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4197 if (pass_by_reference_p)
4198 type = build_pointer_type (type);
4199 size = int_size_in_bytes (type);
4200 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4202 /* build conditional expression to calculate addr. The expression
4203 will be gimplified later. */
4204 paddedsize = size_int (rsize);
4205 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4206 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4207 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4208 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4209 unshare_expr (skip)));
4211 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4212 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4213 size_int (32)), unshare_expr (args));
4215 gimplify_assign (addr, tmp, pre_p);
4217 /* update VALIST.__args */
4218 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4219 gimplify_assign (unshare_expr (args), tmp, pre_p);
4221 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4224 if (pass_by_reference_p)
4225 addr = build_va_arg_indirect_ref (addr);
4227 return build_va_arg_indirect_ref (addr);
4230 /* Save parameter registers starting with the register that corresponds
4231 to the first unnamed parameters. If the first unnamed parameter is
4232 in the stack then save no registers. Set pretend_args_size to the
4233 amount of space needed to save the registers. */
4235 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4236 tree type, int *pretend_size, int no_rtl)
4245 /* cum currently points to the last named argument, we want to
4246 start at the next argument. */
4247 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4249 offset = -STACK_POINTER_OFFSET;
4250 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4252 tmp = gen_frame_mem (V4SImode,
4253 plus_constant (virtual_incoming_args_rtx,
4255 emit_move_insn (tmp,
4256 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4259 *pretend_size = offset + STACK_POINTER_OFFSET;
4264 spu_conditional_register_usage (void)
4268 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4269 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4273 /* This is called any time we inspect the alignment of a register for
4276 reg_aligned_for_addr (rtx x)
4279 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4280 return REGNO_POINTER_ALIGN (regno) >= 128;
4283 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4284 into its SYMBOL_REF_FLAGS. */
4286 spu_encode_section_info (tree decl, rtx rtl, int first)
4288 default_encode_section_info (decl, rtl, first);
4290 /* If a variable has a forced alignment to < 16 bytes, mark it with
4291 SYMBOL_FLAG_ALIGN1. */
4292 if (TREE_CODE (decl) == VAR_DECL
4293 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4294 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4297 /* Return TRUE if we are certain the mem refers to a complete object
4298 which is both 16-byte aligned and padded to a 16-byte boundary. This
4299 would make it safe to store with a single instruction.
4300 We guarantee the alignment and padding for static objects by aligning
4301 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4302 FIXME: We currently cannot guarantee this for objects on the stack
4303 because assign_parm_setup_stack calls assign_stack_local with the
4304 alignment of the parameter mode and in that case the alignment never
4305 gets adjusted by LOCAL_ALIGNMENT. */
4307 store_with_one_insn_p (rtx mem)
4309 enum machine_mode mode = GET_MODE (mem);
4310 rtx addr = XEXP (mem, 0);
4311 if (mode == BLKmode)
4313 if (GET_MODE_SIZE (mode) >= 16)
4315 /* Only static objects. */
4316 if (GET_CODE (addr) == SYMBOL_REF)
4318 /* We use the associated declaration to make sure the access is
4319 referring to the whole object.
4320 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4321 if it is necessary. Will there be cases where one exists, and
4322 the other does not? Will there be cases where both exist, but
4323 have different types? */
4324 tree decl = MEM_EXPR (mem);
4326 && TREE_CODE (decl) == VAR_DECL
4327 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4329 decl = SYMBOL_REF_DECL (addr);
4331 && TREE_CODE (decl) == VAR_DECL
4332 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4338 /* Return 1 when the address is not valid for a simple load and store as
4339 required by the '_mov*' patterns. We could make this less strict
4340 for loads, but we prefer mem's to look the same so they are more
4341 likely to be merged. */
4343 address_needs_split (rtx mem)
4345 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4346 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4347 || !(store_with_one_insn_p (mem)
4348 || mem_is_padded_component_ref (mem))))
4354 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4355 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4356 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4358 /* MEM is known to be an __ea qualified memory access. Emit a call to
4359 fetch the ppu memory to local store, and return its address in local
4363 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4367 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4368 if (!cache_fetch_dirty)
4369 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4370 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4371 2, ea_addr, EAmode, ndirty, SImode);
4376 cache_fetch = init_one_libfunc ("__cache_fetch");
4377 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4378 1, ea_addr, EAmode);
4382 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4383 dirty bit marking, inline.
4385 The cache control data structure is an array of
4387 struct __cache_tag_array
4389 unsigned int tag_lo[4];
4390 unsigned int tag_hi[4];
4391 void *data_pointer[4];
4393 vector unsigned short dirty_bits[4];
4397 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4401 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4402 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4403 rtx index_mask = gen_reg_rtx (SImode);
4404 rtx tag_arr = gen_reg_rtx (Pmode);
4405 rtx splat_mask = gen_reg_rtx (TImode);
4406 rtx splat = gen_reg_rtx (V4SImode);
4407 rtx splat_hi = NULL_RTX;
4408 rtx tag_index = gen_reg_rtx (Pmode);
4409 rtx block_off = gen_reg_rtx (SImode);
4410 rtx tag_addr = gen_reg_rtx (Pmode);
4411 rtx tag = gen_reg_rtx (V4SImode);
4412 rtx cache_tag = gen_reg_rtx (V4SImode);
4413 rtx cache_tag_hi = NULL_RTX;
4414 rtx cache_ptrs = gen_reg_rtx (TImode);
4415 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4416 rtx tag_equal = gen_reg_rtx (V4SImode);
4417 rtx tag_equal_hi = NULL_RTX;
4418 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4419 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4420 rtx eq_index = gen_reg_rtx (SImode);
4421 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4423 if (spu_ea_model != 32)
4425 splat_hi = gen_reg_rtx (V4SImode);
4426 cache_tag_hi = gen_reg_rtx (V4SImode);
4427 tag_equal_hi = gen_reg_rtx (V4SImode);
4430 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4431 emit_move_insn (tag_arr, tag_arr_sym);
4432 v = 0x0001020300010203LL;
4433 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4434 ea_addr_si = ea_addr;
4435 if (spu_ea_model != 32)
4436 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4438 /* tag_index = ea_addr & (tag_array_size - 128) */
4439 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4441 /* splat ea_addr to all 4 slots. */
4442 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4443 /* Similarly for high 32 bits of ea_addr. */
4444 if (spu_ea_model != 32)
4445 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4447 /* block_off = ea_addr & 127 */
4448 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4450 /* tag_addr = tag_arr + tag_index */
4451 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4453 /* Read cache tags. */
4454 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4455 if (spu_ea_model != 32)
4456 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4457 plus_constant (tag_addr, 16)));
4459 /* tag = ea_addr & -128 */
4460 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4462 /* Read all four cache data pointers. */
4463 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4464 plus_constant (tag_addr, 32)));
4467 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4468 if (spu_ea_model != 32)
4470 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4471 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4474 /* At most one of the tags compare equal, so tag_equal has one
4475 32-bit slot set to all 1's, with the other slots all zero.
4476 gbb picks off low bit from each byte in the 128-bit registers,
4477 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4479 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4480 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4482 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4483 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4485 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4486 (rotating eq_index mod 16 bytes). */
4487 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4488 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4490 /* Add block offset to form final data address. */
4491 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4493 /* Check that we did hit. */
4494 hit_label = gen_label_rtx ();
4495 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4496 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4497 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4498 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4500 /* Say that this branch is very likely to happen. */
4501 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4503 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (v), REG_NOTES (insn));
4505 ea_load_store (mem, is_store, ea_addr, data_addr);
4506 cont_label = gen_label_rtx ();
4507 emit_jump_insn (gen_jump (cont_label));
4510 emit_label (hit_label);
4515 rtx dirty_bits = gen_reg_rtx (TImode);
4516 rtx dirty_off = gen_reg_rtx (SImode);
4517 rtx dirty_128 = gen_reg_rtx (TImode);
4518 rtx neg_block_off = gen_reg_rtx (SImode);
4520 /* Set up mask with one dirty bit per byte of the mem we are
4521 writing, starting from top bit. */
4523 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4524 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4529 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4531 /* Form index into cache dirty_bits. eq_index is one of
4532 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4533 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4534 offset to each of the four dirty_bits elements. */
4535 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4537 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4539 /* Rotate bit mask to proper bit. */
4540 emit_insn (gen_negsi2 (neg_block_off, block_off));
4541 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4542 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4544 /* Or in the new dirty bits. */
4545 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4548 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4551 emit_label (cont_label);
4555 expand_ea_mem (rtx mem, bool is_store)
4558 rtx data_addr = gen_reg_rtx (Pmode);
4561 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4562 if (optimize_size || optimize == 0)
4563 ea_load_store (mem, is_store, ea_addr, data_addr);
4565 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4567 if (ea_alias_set == -1)
4568 ea_alias_set = new_alias_set ();
4570 /* We generate a new MEM RTX to refer to the copy of the data
4571 in the cache. We do not copy memory attributes (except the
4572 alignment) from the original MEM, as they may no longer apply
4573 to the cache copy. */
4574 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4575 set_mem_alias_set (new_mem, ea_alias_set);
4576 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4582 spu_expand_mov (rtx * ops, enum machine_mode mode)
4584 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4587 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4589 rtx from = SUBREG_REG (ops[1]);
4590 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4592 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4593 && GET_MODE_CLASS (imode) == MODE_INT
4594 && subreg_lowpart_p (ops[1]));
4596 if (GET_MODE_SIZE (imode) < 4)
4598 if (imode != GET_MODE (from))
4599 from = gen_rtx_SUBREG (imode, from, 0);
4601 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4603 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4604 emit_insn (GEN_FCN (icode) (ops[0], from));
4607 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4611 /* At least one of the operands needs to be a register. */
4612 if ((reload_in_progress | reload_completed) == 0
4613 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4615 rtx temp = force_reg (mode, ops[1]);
4616 emit_move_insn (ops[0], temp);
4619 if (reload_in_progress || reload_completed)
4621 if (CONSTANT_P (ops[1]))
4622 return spu_split_immediate (ops);
4626 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4628 if (GET_CODE (ops[1]) == CONST_INT)
4630 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4631 if (val != INTVAL (ops[1]))
4633 emit_move_insn (ops[0], GEN_INT (val));
4639 if (MEM_ADDR_SPACE (ops[0]))
4640 ops[0] = expand_ea_mem (ops[0], true);
4641 return spu_split_store (ops);
4645 if (MEM_ADDR_SPACE (ops[1]))
4646 ops[1] = expand_ea_mem (ops[1], false);
4647 return spu_split_load (ops);
4654 spu_convert_move (rtx dst, rtx src)
4656 enum machine_mode mode = GET_MODE (dst);
4657 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4659 gcc_assert (GET_MODE (src) == TImode);
4660 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4661 emit_insn (gen_rtx_SET (VOIDmode, reg,
4662 gen_rtx_TRUNCATE (int_mode,
4663 gen_rtx_LSHIFTRT (TImode, src,
4664 GEN_INT (int_mode == DImode ? 64 : 96)))));
4665 if (int_mode != mode)
4667 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4668 emit_move_insn (dst, reg);
4672 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4673 the address from SRC and SRC+16. Return a REG or CONST_INT that
4674 specifies how many bytes to rotate the loaded registers, plus any
4675 extra from EXTRA_ROTQBY. The address and rotate amounts are
4676 normalized to improve merging of loads and rotate computations. */
4678 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4680 rtx addr = XEXP (src, 0);
4681 rtx p0, p1, rot, addr0, addr1;
4687 if (MEM_ALIGN (src) >= 128)
4688 /* Address is already aligned; simply perform a TImode load. */ ;
4689 else if (GET_CODE (addr) == PLUS)
4692 aligned reg + aligned reg => lqx
4693 aligned reg + unaligned reg => lqx, rotqby
4694 aligned reg + aligned const => lqd
4695 aligned reg + unaligned const => lqd, rotqbyi
4696 unaligned reg + aligned reg => lqx, rotqby
4697 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4698 unaligned reg + aligned const => lqd, rotqby
4699 unaligned reg + unaligned const -> not allowed by legitimate address
4701 p0 = XEXP (addr, 0);
4702 p1 = XEXP (addr, 1);
4703 if (!reg_aligned_for_addr (p0))
4705 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4707 rot = gen_reg_rtx (SImode);
4708 emit_insn (gen_addsi3 (rot, p0, p1));
4710 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4714 && INTVAL (p1) * BITS_PER_UNIT
4715 < REGNO_POINTER_ALIGN (REGNO (p0)))
4717 rot = gen_reg_rtx (SImode);
4718 emit_insn (gen_addsi3 (rot, p0, p1));
4723 rtx x = gen_reg_rtx (SImode);
4724 emit_move_insn (x, p1);
4725 if (!spu_arith_operand (p1, SImode))
4727 rot = gen_reg_rtx (SImode);
4728 emit_insn (gen_addsi3 (rot, p0, p1));
4729 addr = gen_rtx_PLUS (Pmode, p0, x);
4737 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4739 rot_amt = INTVAL (p1) & 15;
4740 if (INTVAL (p1) & -16)
4742 p1 = GEN_INT (INTVAL (p1) & -16);
4743 addr = gen_rtx_PLUS (SImode, p0, p1);
4748 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4752 else if (REG_P (addr))
4754 if (!reg_aligned_for_addr (addr))
4757 else if (GET_CODE (addr) == CONST)
4759 if (GET_CODE (XEXP (addr, 0)) == PLUS
4760 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4761 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4763 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4765 addr = gen_rtx_CONST (Pmode,
4766 gen_rtx_PLUS (Pmode,
4767 XEXP (XEXP (addr, 0), 0),
4768 GEN_INT (rot_amt & -16)));
4770 addr = XEXP (XEXP (addr, 0), 0);
4774 rot = gen_reg_rtx (Pmode);
4775 emit_move_insn (rot, addr);
4778 else if (GET_CODE (addr) == CONST_INT)
4780 rot_amt = INTVAL (addr);
4781 addr = GEN_INT (rot_amt & -16);
4783 else if (!ALIGNED_SYMBOL_REF_P (addr))
4785 rot = gen_reg_rtx (Pmode);
4786 emit_move_insn (rot, addr);
4789 rot_amt += extra_rotby;
4795 rtx x = gen_reg_rtx (SImode);
4796 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4800 if (!rot && rot_amt)
4801 rot = GEN_INT (rot_amt);
4803 addr0 = copy_rtx (addr);
4804 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4805 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4809 addr1 = plus_constant (copy_rtx (addr), 16);
4810 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4811 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4818 spu_split_load (rtx * ops)
4820 enum machine_mode mode = GET_MODE (ops[0]);
4821 rtx addr, load, rot;
4824 if (GET_MODE_SIZE (mode) >= 16)
4827 addr = XEXP (ops[1], 0);
4828 gcc_assert (GET_CODE (addr) != AND);
4830 if (!address_needs_split (ops[1]))
4832 ops[1] = change_address (ops[1], TImode, addr);
4833 load = gen_reg_rtx (TImode);
4834 emit_insn (gen__movti (load, ops[1]));
4835 spu_convert_move (ops[0], load);
4839 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4841 load = gen_reg_rtx (TImode);
4842 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4845 emit_insn (gen_rotqby_ti (load, load, rot));
4847 spu_convert_move (ops[0], load);
4852 spu_split_store (rtx * ops)
4854 enum machine_mode mode = GET_MODE (ops[0]);
4856 rtx addr, p0, p1, p1_lo, smem;
4860 if (GET_MODE_SIZE (mode) >= 16)
4863 addr = XEXP (ops[0], 0);
4864 gcc_assert (GET_CODE (addr) != AND);
4866 if (!address_needs_split (ops[0]))
4868 reg = gen_reg_rtx (TImode);
4869 emit_insn (gen_spu_convert (reg, ops[1]));
4870 ops[0] = change_address (ops[0], TImode, addr);
4871 emit_move_insn (ops[0], reg);
4875 if (GET_CODE (addr) == PLUS)
4878 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4879 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4880 aligned reg + aligned const => lqd, c?d, shuf, stqx
4881 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4882 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4883 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4884 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4885 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4888 p0 = XEXP (addr, 0);
4889 p1 = p1_lo = XEXP (addr, 1);
4890 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4892 p1_lo = GEN_INT (INTVAL (p1) & 15);
4893 if (reg_aligned_for_addr (p0))
4895 p1 = GEN_INT (INTVAL (p1) & -16);
4896 if (p1 == const0_rtx)
4899 addr = gen_rtx_PLUS (SImode, p0, p1);
4903 rtx x = gen_reg_rtx (SImode);
4904 emit_move_insn (x, p1);
4905 addr = gen_rtx_PLUS (SImode, p0, x);
4909 else if (REG_P (addr))
4913 p1 = p1_lo = const0_rtx;
4918 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4919 p1 = 0; /* aform doesn't use p1 */
4921 if (ALIGNED_SYMBOL_REF_P (addr))
4923 else if (GET_CODE (addr) == CONST
4924 && GET_CODE (XEXP (addr, 0)) == PLUS
4925 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4926 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4928 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4930 addr = gen_rtx_CONST (Pmode,
4931 gen_rtx_PLUS (Pmode,
4932 XEXP (XEXP (addr, 0), 0),
4933 GEN_INT (v & -16)));
4935 addr = XEXP (XEXP (addr, 0), 0);
4936 p1_lo = GEN_INT (v & 15);
4938 else if (GET_CODE (addr) == CONST_INT)
4940 p1_lo = GEN_INT (INTVAL (addr) & 15);
4941 addr = GEN_INT (INTVAL (addr) & -16);
4945 p1_lo = gen_reg_rtx (SImode);
4946 emit_move_insn (p1_lo, addr);
4950 reg = gen_reg_rtx (TImode);
4952 scalar = store_with_one_insn_p (ops[0]);
4955 /* We could copy the flags from the ops[0] MEM to mem here,
4956 We don't because we want this load to be optimized away if
4957 possible, and copying the flags will prevent that in certain
4958 cases, e.g. consider the volatile flag. */
4960 rtx pat = gen_reg_rtx (TImode);
4961 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4962 set_mem_alias_set (lmem, 0);
4963 emit_insn (gen_movti (reg, lmem));
4965 if (!p0 || reg_aligned_for_addr (p0))
4966 p0 = stack_pointer_rtx;
4970 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4971 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4975 if (GET_CODE (ops[1]) == REG)
4976 emit_insn (gen_spu_convert (reg, ops[1]));
4977 else if (GET_CODE (ops[1]) == SUBREG)
4978 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4983 if (GET_MODE_SIZE (mode) < 4 && scalar)
4984 emit_insn (gen_ashlti3
4985 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4987 smem = change_address (ops[0], TImode, copy_rtx (addr));
4988 /* We can't use the previous alias set because the memory has changed
4989 size and can potentially overlap objects of other types. */
4990 set_mem_alias_set (smem, 0);
4992 emit_insn (gen_movti (smem, reg));
4996 /* Return TRUE if X is MEM which is a struct member reference
4997 and the member can safely be loaded and stored with a single
4998 instruction because it is padded. */
5000 mem_is_padded_component_ref (rtx x)
5002 tree t = MEM_EXPR (x);
5004 if (!t || TREE_CODE (t) != COMPONENT_REF)
5006 t = TREE_OPERAND (t, 1);
5007 if (!t || TREE_CODE (t) != FIELD_DECL
5008 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5010 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5011 r = DECL_FIELD_CONTEXT (t);
5012 if (!r || TREE_CODE (r) != RECORD_TYPE)
5014 /* Make sure they are the same mode */
5015 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5017 /* If there are no following fields then the field alignment assures
5018 the structure is padded to the alignment which means this field is
5020 if (TREE_CHAIN (t) == 0)
5022 /* If the following field is also aligned then this field will be
5025 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5030 /* Parse the -mfixed-range= option string. */
5032 fix_range (const char *const_str)
5035 char *str, *dash, *comma;
5037 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5038 REG2 are either register names or register numbers. The effect
5039 of this option is to mark the registers in the range from REG1 to
5040 REG2 as ``fixed'' so they won't be used by the compiler. */
5042 i = strlen (const_str);
5043 str = (char *) alloca (i + 1);
5044 memcpy (str, const_str, i + 1);
5048 dash = strchr (str, '-');
5051 warning (0, "value of -mfixed-range must have form REG1-REG2");
5055 comma = strchr (dash + 1, ',');
5059 first = decode_reg_name (str);
5062 warning (0, "unknown register name: %s", str);
5066 last = decode_reg_name (dash + 1);
5069 warning (0, "unknown register name: %s", dash + 1);
5077 warning (0, "%s-%s is an empty range", str, dash + 1);
5081 for (i = first; i <= last; ++i)
5082 fixed_regs[i] = call_used_regs[i] = 1;
5092 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5093 can be generated using the fsmbi instruction. */
5095 fsmbi_const_p (rtx x)
5099 /* We can always choose TImode for CONST_INT because the high bits
5100 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5101 enum immediate_class c = classify_immediate (x, TImode);
5102 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5107 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5108 can be generated using the cbd, chd, cwd or cdd instruction. */
5110 cpat_const_p (rtx x, enum machine_mode mode)
5114 enum immediate_class c = classify_immediate (x, mode);
5115 return c == IC_CPAT;
5121 gen_cpat_const (rtx * ops)
5123 unsigned char dst[16];
5124 int i, offset, shift, isize;
5125 if (GET_CODE (ops[3]) != CONST_INT
5126 || GET_CODE (ops[2]) != CONST_INT
5127 || (GET_CODE (ops[1]) != CONST_INT
5128 && GET_CODE (ops[1]) != REG))
5130 if (GET_CODE (ops[1]) == REG
5131 && (!REG_POINTER (ops[1])
5132 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5135 for (i = 0; i < 16; i++)
5137 isize = INTVAL (ops[3]);
5140 else if (isize == 2)
5144 offset = (INTVAL (ops[2]) +
5145 (GET_CODE (ops[1]) ==
5146 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5147 for (i = 0; i < isize; i++)
5148 dst[offset + i] = i + shift;
5149 return array_to_constant (TImode, dst);
5152 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5153 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5154 than 16 bytes, the value is repeated across the rest of the array. */
5156 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5161 memset (arr, 0, 16);
5162 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5163 if (GET_CODE (x) == CONST_INT
5164 || (GET_CODE (x) == CONST_DOUBLE
5165 && (mode == SFmode || mode == DFmode)))
5167 gcc_assert (mode != VOIDmode && mode != BLKmode);
5169 if (GET_CODE (x) == CONST_DOUBLE)
5170 val = const_double_to_hwint (x);
5173 first = GET_MODE_SIZE (mode) - 1;
5174 for (i = first; i >= 0; i--)
5176 arr[i] = val & 0xff;
5179 /* Splat the constant across the whole array. */
5180 for (j = 0, i = first + 1; i < 16; i++)
5183 j = (j == first) ? 0 : j + 1;
5186 else if (GET_CODE (x) == CONST_DOUBLE)
5188 val = CONST_DOUBLE_LOW (x);
5189 for (i = 15; i >= 8; i--)
5191 arr[i] = val & 0xff;
5194 val = CONST_DOUBLE_HIGH (x);
5195 for (i = 7; i >= 0; i--)
5197 arr[i] = val & 0xff;
5201 else if (GET_CODE (x) == CONST_VECTOR)
5205 mode = GET_MODE_INNER (mode);
5206 units = CONST_VECTOR_NUNITS (x);
5207 for (i = 0; i < units; i++)
5209 elt = CONST_VECTOR_ELT (x, i);
5210 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5212 if (GET_CODE (elt) == CONST_DOUBLE)
5213 val = const_double_to_hwint (elt);
5216 first = GET_MODE_SIZE (mode) - 1;
5217 if (first + i * GET_MODE_SIZE (mode) > 16)
5219 for (j = first; j >= 0; j--)
5221 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5231 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5232 smaller than 16 bytes, use the bytes that would represent that value
5233 in a register, e.g., for QImode return the value of arr[3]. */
5235 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5237 enum machine_mode inner_mode;
5239 int units, size, i, j, k;
5242 if (GET_MODE_CLASS (mode) == MODE_INT
5243 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5245 j = GET_MODE_SIZE (mode);
5246 i = j < 4 ? 4 - j : 0;
5247 for (val = 0; i < j; i++)
5248 val = (val << 8) | arr[i];
5249 val = trunc_int_for_mode (val, mode);
5250 return GEN_INT (val);
5256 for (i = high = 0; i < 8; i++)
5257 high = (high << 8) | arr[i];
5258 for (i = 8, val = 0; i < 16; i++)
5259 val = (val << 8) | arr[i];
5260 return immed_double_const (val, high, TImode);
5264 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5265 val = trunc_int_for_mode (val, SImode);
5266 return hwint_to_const_double (SFmode, val);
5270 for (i = 0, val = 0; i < 8; i++)
5271 val = (val << 8) | arr[i];
5272 return hwint_to_const_double (DFmode, val);
5275 if (!VECTOR_MODE_P (mode))
5278 units = GET_MODE_NUNITS (mode);
5279 size = GET_MODE_UNIT_SIZE (mode);
5280 inner_mode = GET_MODE_INNER (mode);
5281 v = rtvec_alloc (units);
5283 for (k = i = 0; i < units; ++i)
5286 for (j = 0; j < size; j++, k++)
5287 val = (val << 8) | arr[k];
5289 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5290 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5292 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5297 return gen_rtx_CONST_VECTOR (mode, v);
5301 reloc_diagnostic (rtx x)
5304 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5307 if (GET_CODE (x) == SYMBOL_REF)
5308 decl = SYMBOL_REF_DECL (x);
5309 else if (GET_CODE (x) == CONST
5310 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5311 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5313 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5314 if (decl && !DECL_P (decl))
5317 /* The decl could be a string constant. */
5318 if (decl && DECL_P (decl))
5321 /* We use last_assemble_variable_decl to get line information. It's
5322 not always going to be right and might not even be close, but will
5323 be right for the more common cases. */
5324 if (!last_assemble_variable_decl || in_section == ctors_section)
5325 loc = DECL_SOURCE_LOCATION (decl);
5327 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5329 if (TARGET_WARN_RELOC)
5331 "creating run-time relocation for %qD", decl);
5334 "creating run-time relocation for %qD", decl);
5338 if (TARGET_WARN_RELOC)
5339 warning_at (input_location, 0, "creating run-time relocation");
5341 error_at (input_location, "creating run-time relocation");
5345 /* Hook into assemble_integer so we can generate an error for run-time
5346 relocations. The SPU ABI disallows them. */
5348 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5350 /* By default run-time relocations aren't supported, but we allow them
5351 in case users support it in their own run-time loader. And we provide
5352 a warning for those users that don't. */
5353 if ((GET_CODE (x) == SYMBOL_REF)
5354 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5355 reloc_diagnostic (x);
5357 return default_assemble_integer (x, size, aligned_p);
5361 spu_asm_globalize_label (FILE * file, const char *name)
5363 fputs ("\t.global\t", file);
5364 assemble_name (file, name);
5369 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5370 bool speed ATTRIBUTE_UNUSED)
5372 enum machine_mode mode = GET_MODE (x);
5373 int cost = COSTS_N_INSNS (2);
5375 /* Folding to a CONST_VECTOR will use extra space but there might
5376 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5377 only if it allows us to fold away multiple insns. Changing the cost
5378 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5379 because this cost will only be compared against a single insn.
5380 if (code == CONST_VECTOR)
5381 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5384 /* Use defaults for float operations. Not accurate but good enough. */
5387 *total = COSTS_N_INSNS (13);
5392 *total = COSTS_N_INSNS (6);
5398 if (satisfies_constraint_K (x))
5400 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5401 *total = COSTS_N_INSNS (1);
5403 *total = COSTS_N_INSNS (3);
5407 *total = COSTS_N_INSNS (3);
5412 *total = COSTS_N_INSNS (0);
5416 *total = COSTS_N_INSNS (5);
5420 case FLOAT_TRUNCATE:
5422 case UNSIGNED_FLOAT:
5425 *total = COSTS_N_INSNS (7);
5431 *total = COSTS_N_INSNS (9);
5438 GET_CODE (XEXP (x, 0)) ==
5439 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5440 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5442 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5444 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5445 cost = COSTS_N_INSNS (14);
5446 if ((val & 0xffff) == 0)
5447 cost = COSTS_N_INSNS (9);
5448 else if (val > 0 && val < 0x10000)
5449 cost = COSTS_N_INSNS (11);
5458 *total = COSTS_N_INSNS (20);
5465 *total = COSTS_N_INSNS (4);
5468 if (XINT (x, 1) == UNSPEC_CONVERT)
5469 *total = COSTS_N_INSNS (0);
5471 *total = COSTS_N_INSNS (4);
5474 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5475 if (GET_MODE_CLASS (mode) == MODE_INT
5476 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5477 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5478 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5483 static enum machine_mode
5484 spu_unwind_word_mode (void)
5489 /* Decide whether we can make a sibling call to a function. DECL is the
5490 declaration of the function being targeted by the call and EXP is the
5491 CALL_EXPR representing the call. */
5493 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5495 return decl && !TARGET_LARGE_MEM;
5498 /* We need to correctly update the back chain pointer and the Available
5499 Stack Size (which is in the second slot of the sp register.) */
5501 spu_allocate_stack (rtx op0, rtx op1)
5504 rtx chain = gen_reg_rtx (V4SImode);
5505 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5506 rtx sp = gen_reg_rtx (V4SImode);
5507 rtx splatted = gen_reg_rtx (V4SImode);
5508 rtx pat = gen_reg_rtx (TImode);
5510 /* copy the back chain so we can save it back again. */
5511 emit_move_insn (chain, stack_bot);
5513 op1 = force_reg (SImode, op1);
5515 v = 0x1020300010203ll;
5516 emit_move_insn (pat, immed_double_const (v, v, TImode));
5517 emit_insn (gen_shufb (splatted, op1, op1, pat));
5519 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5520 emit_insn (gen_subv4si3 (sp, sp, splatted));
5522 if (flag_stack_check)
5524 rtx avail = gen_reg_rtx(SImode);
5525 rtx result = gen_reg_rtx(SImode);
5526 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5527 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5528 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5531 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5533 emit_move_insn (stack_bot, chain);
5535 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5539 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5541 static unsigned char arr[16] =
5542 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5543 rtx temp = gen_reg_rtx (SImode);
5544 rtx temp2 = gen_reg_rtx (SImode);
5545 rtx temp3 = gen_reg_rtx (V4SImode);
5546 rtx temp4 = gen_reg_rtx (V4SImode);
5547 rtx pat = gen_reg_rtx (TImode);
5548 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5550 /* Restore the backchain from the first word, sp from the second. */
5551 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5552 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5554 emit_move_insn (pat, array_to_constant (TImode, arr));
5556 /* Compute Available Stack Size for sp */
5557 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5558 emit_insn (gen_shufb (temp3, temp, temp, pat));
5560 /* Compute Available Stack Size for back chain */
5561 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5562 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5563 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5565 emit_insn (gen_addv4si3 (sp, sp, temp3));
5566 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5570 spu_init_libfuncs (void)
5572 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5573 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5574 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5575 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5576 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5577 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5578 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5579 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5580 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5581 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5582 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5584 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5585 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5587 set_optab_libfunc (smul_optab, TImode, "__multi3");
5588 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5589 set_optab_libfunc (smod_optab, TImode, "__modti3");
5590 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5591 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5592 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5595 /* Make a subreg, stripping any existing subreg. We could possibly just
5596 call simplify_subreg, but in this case we know what we want. */
5598 spu_gen_subreg (enum machine_mode mode, rtx x)
5600 if (GET_CODE (x) == SUBREG)
5602 if (GET_MODE (x) == mode)
5604 return gen_rtx_SUBREG (mode, x, 0);
5608 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5610 return (TYPE_MODE (type) == BLKmode
5612 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5613 || int_size_in_bytes (type) >
5614 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5617 /* Create the built-in types and functions */
5619 enum spu_function_code
5621 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5622 #include "spu-builtins.def"
5627 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5629 struct spu_builtin_description spu_builtins[] = {
5630 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5631 {fcode, icode, name, type, params, NULL_TREE},
5632 #include "spu-builtins.def"
5636 /* Returns the rs6000 builtin decl for CODE. */
5639 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5641 if (code >= NUM_SPU_BUILTINS)
5642 return error_mark_node;
5644 return spu_builtins[code].fndecl;
5649 spu_init_builtins (void)
5651 struct spu_builtin_description *d;
5654 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5655 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5656 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5657 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5658 V4SF_type_node = build_vector_type (float_type_node, 4);
5659 V2DF_type_node = build_vector_type (double_type_node, 2);
5661 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5662 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5663 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5664 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5666 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5668 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5669 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5670 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5671 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5672 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5673 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5674 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5675 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5676 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5677 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5678 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5679 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5681 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5682 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5683 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5684 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5685 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5686 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5687 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5688 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5690 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5691 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5693 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5695 spu_builtin_types[SPU_BTI_PTR] =
5696 build_pointer_type (build_qualified_type
5698 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5700 /* For each builtin we build a new prototype. The tree code will make
5701 sure nodes are shared. */
5702 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5705 char name[64]; /* build_function will make a copy. */
5711 /* Find last parm. */
5712 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5717 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5719 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5721 sprintf (name, "__builtin_%s", d->name);
5723 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5725 if (d->fcode == SPU_MASK_FOR_LOAD)
5726 TREE_READONLY (d->fndecl) = 1;
5728 /* These builtins don't throw. */
5729 TREE_NOTHROW (d->fndecl) = 1;
5734 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5736 static unsigned char arr[16] =
5737 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5739 rtx temp = gen_reg_rtx (Pmode);
5740 rtx temp2 = gen_reg_rtx (V4SImode);
5741 rtx temp3 = gen_reg_rtx (V4SImode);
5742 rtx pat = gen_reg_rtx (TImode);
5743 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5745 emit_move_insn (pat, array_to_constant (TImode, arr));
5747 /* Restore the sp. */
5748 emit_move_insn (temp, op1);
5749 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5751 /* Compute available stack size for sp. */
5752 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5753 emit_insn (gen_shufb (temp3, temp, temp, pat));
5755 emit_insn (gen_addv4si3 (sp, sp, temp3));
5756 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5760 spu_safe_dma (HOST_WIDE_INT channel)
5762 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5766 spu_builtin_splats (rtx ops[])
5768 enum machine_mode mode = GET_MODE (ops[0]);
5769 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5771 unsigned char arr[16];
5772 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5773 emit_move_insn (ops[0], array_to_constant (mode, arr));
5777 rtx reg = gen_reg_rtx (TImode);
5779 if (GET_CODE (ops[1]) != REG
5780 && GET_CODE (ops[1]) != SUBREG)
5781 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5787 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5793 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5798 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5803 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5809 emit_move_insn (reg, shuf);
5810 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5815 spu_builtin_extract (rtx ops[])
5817 enum machine_mode mode;
5820 mode = GET_MODE (ops[1]);
5822 if (GET_CODE (ops[2]) == CONST_INT)
5827 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5830 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5833 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5836 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5839 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5842 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5850 from = spu_gen_subreg (TImode, ops[1]);
5851 rot = gen_reg_rtx (TImode);
5852 tmp = gen_reg_rtx (SImode);
5857 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5860 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5861 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5865 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5869 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5874 emit_insn (gen_rotqby_ti (rot, from, tmp));
5876 emit_insn (gen_spu_convert (ops[0], rot));
5880 spu_builtin_insert (rtx ops[])
5882 enum machine_mode mode = GET_MODE (ops[0]);
5883 enum machine_mode imode = GET_MODE_INNER (mode);
5884 rtx mask = gen_reg_rtx (TImode);
5887 if (GET_CODE (ops[3]) == CONST_INT)
5888 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5891 offset = gen_reg_rtx (SImode);
5892 emit_insn (gen_mulsi3
5893 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5896 (mask, stack_pointer_rtx, offset,
5897 GEN_INT (GET_MODE_SIZE (imode))));
5898 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5902 spu_builtin_promote (rtx ops[])
5904 enum machine_mode mode, imode;
5905 rtx rot, from, offset;
5908 mode = GET_MODE (ops[0]);
5909 imode = GET_MODE_INNER (mode);
5911 from = gen_reg_rtx (TImode);
5912 rot = spu_gen_subreg (TImode, ops[0]);
5914 emit_insn (gen_spu_convert (from, ops[1]));
5916 if (GET_CODE (ops[2]) == CONST_INT)
5918 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5919 if (GET_MODE_SIZE (imode) < 4)
5920 pos += 4 - GET_MODE_SIZE (imode);
5921 offset = GEN_INT (pos & 15);
5925 offset = gen_reg_rtx (SImode);
5929 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5932 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5933 emit_insn (gen_addsi3 (offset, offset, offset));
5937 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5938 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5942 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5948 emit_insn (gen_rotqby_ti (rot, from, offset));
5952 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5954 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5955 rtx shuf = gen_reg_rtx (V4SImode);
5956 rtx insn = gen_reg_rtx (V4SImode);
5961 fnaddr = force_reg (SImode, fnaddr);
5962 cxt = force_reg (SImode, cxt);
5964 if (TARGET_LARGE_MEM)
5966 rtx rotl = gen_reg_rtx (V4SImode);
5967 rtx mask = gen_reg_rtx (V4SImode);
5968 rtx bi = gen_reg_rtx (SImode);
5969 static unsigned char const shufa[16] = {
5970 2, 3, 0, 1, 18, 19, 16, 17,
5971 0, 1, 2, 3, 16, 17, 18, 19
5973 static unsigned char const insna[16] = {
5975 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5977 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5980 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5981 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5983 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5984 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5985 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5986 emit_insn (gen_selb (insn, insnc, rotl, mask));
5988 mem = adjust_address (m_tramp, V4SImode, 0);
5989 emit_move_insn (mem, insn);
5991 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5992 mem = adjust_address (m_tramp, Pmode, 16);
5993 emit_move_insn (mem, bi);
5997 rtx scxt = gen_reg_rtx (SImode);
5998 rtx sfnaddr = gen_reg_rtx (SImode);
5999 static unsigned char const insna[16] = {
6000 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6006 shufc = gen_reg_rtx (TImode);
6007 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6009 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6010 fits 18 bits and the last 4 are zeros. This will be true if
6011 the stack pointer is initialized to 0x3fff0 at program start,
6012 otherwise the ila instruction will be garbage. */
6014 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6015 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6017 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6018 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6019 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6021 mem = adjust_address (m_tramp, V4SImode, 0);
6022 emit_move_insn (mem, insn);
6024 emit_insn (gen_sync ());
6028 spu_expand_sign_extend (rtx ops[])
6030 unsigned char arr[16];
6031 rtx pat = gen_reg_rtx (TImode);
6034 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6035 if (GET_MODE (ops[1]) == QImode)
6037 sign = gen_reg_rtx (HImode);
6038 emit_insn (gen_extendqihi2 (sign, ops[1]));
6039 for (i = 0; i < 16; i++)
6045 for (i = 0; i < 16; i++)
6047 switch (GET_MODE (ops[1]))
6050 sign = gen_reg_rtx (SImode);
6051 emit_insn (gen_extendhisi2 (sign, ops[1]));
6053 arr[last - 1] = 0x02;
6056 sign = gen_reg_rtx (SImode);
6057 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6058 for (i = 0; i < 4; i++)
6059 arr[last - i] = 3 - i;
6062 sign = gen_reg_rtx (SImode);
6063 c = gen_reg_rtx (SImode);
6064 emit_insn (gen_spu_convert (c, ops[1]));
6065 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6066 for (i = 0; i < 8; i++)
6067 arr[last - i] = 7 - i;
6073 emit_move_insn (pat, array_to_constant (TImode, arr));
6074 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6077 /* expand vector initialization. If there are any constant parts,
6078 load constant parts first. Then load any non-constant parts. */
6080 spu_expand_vector_init (rtx target, rtx vals)
6082 enum machine_mode mode = GET_MODE (target);
6083 int n_elts = GET_MODE_NUNITS (mode);
6085 bool all_same = true;
6086 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6089 first = XVECEXP (vals, 0, 0);
6090 for (i = 0; i < n_elts; ++i)
6092 x = XVECEXP (vals, 0, i);
6093 if (!(CONST_INT_P (x)
6094 || GET_CODE (x) == CONST_DOUBLE
6095 || GET_CODE (x) == CONST_FIXED))
6099 if (first_constant == NULL_RTX)
6102 if (i > 0 && !rtx_equal_p (x, first))
6106 /* if all elements are the same, use splats to repeat elements */
6109 if (!CONSTANT_P (first)
6110 && !register_operand (first, GET_MODE (x)))
6111 first = force_reg (GET_MODE (first), first);
6112 emit_insn (gen_spu_splats (target, first));
6116 /* load constant parts */
6117 if (n_var != n_elts)
6121 emit_move_insn (target,
6122 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6126 rtx constant_parts_rtx = copy_rtx (vals);
6128 gcc_assert (first_constant != NULL_RTX);
6129 /* fill empty slots with the first constant, this increases
6130 our chance of using splats in the recursive call below. */
6131 for (i = 0; i < n_elts; ++i)
6133 x = XVECEXP (constant_parts_rtx, 0, i);
6134 if (!(CONST_INT_P (x)
6135 || GET_CODE (x) == CONST_DOUBLE
6136 || GET_CODE (x) == CONST_FIXED))
6137 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6140 spu_expand_vector_init (target, constant_parts_rtx);
6144 /* load variable parts */
6147 rtx insert_operands[4];
6149 insert_operands[0] = target;
6150 insert_operands[2] = target;
6151 for (i = 0; i < n_elts; ++i)
6153 x = XVECEXP (vals, 0, i);
6154 if (!(CONST_INT_P (x)
6155 || GET_CODE (x) == CONST_DOUBLE
6156 || GET_CODE (x) == CONST_FIXED))
6158 if (!register_operand (x, GET_MODE (x)))
6159 x = force_reg (GET_MODE (x), x);
6160 insert_operands[1] = x;
6161 insert_operands[3] = GEN_INT (i);
6162 spu_builtin_insert (insert_operands);
6168 /* Return insn index for the vector compare instruction for given CODE,
6169 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6172 get_vec_cmp_insn (enum rtx_code code,
6173 enum machine_mode dest_mode,
6174 enum machine_mode op_mode)
6180 if (dest_mode == V16QImode && op_mode == V16QImode)
6181 return CODE_FOR_ceq_v16qi;
6182 if (dest_mode == V8HImode && op_mode == V8HImode)
6183 return CODE_FOR_ceq_v8hi;
6184 if (dest_mode == V4SImode && op_mode == V4SImode)
6185 return CODE_FOR_ceq_v4si;
6186 if (dest_mode == V4SImode && op_mode == V4SFmode)
6187 return CODE_FOR_ceq_v4sf;
6188 if (dest_mode == V2DImode && op_mode == V2DFmode)
6189 return CODE_FOR_ceq_v2df;
6192 if (dest_mode == V16QImode && op_mode == V16QImode)
6193 return CODE_FOR_cgt_v16qi;
6194 if (dest_mode == V8HImode && op_mode == V8HImode)
6195 return CODE_FOR_cgt_v8hi;
6196 if (dest_mode == V4SImode && op_mode == V4SImode)
6197 return CODE_FOR_cgt_v4si;
6198 if (dest_mode == V4SImode && op_mode == V4SFmode)
6199 return CODE_FOR_cgt_v4sf;
6200 if (dest_mode == V2DImode && op_mode == V2DFmode)
6201 return CODE_FOR_cgt_v2df;
6204 if (dest_mode == V16QImode && op_mode == V16QImode)
6205 return CODE_FOR_clgt_v16qi;
6206 if (dest_mode == V8HImode && op_mode == V8HImode)
6207 return CODE_FOR_clgt_v8hi;
6208 if (dest_mode == V4SImode && op_mode == V4SImode)
6209 return CODE_FOR_clgt_v4si;
6217 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6218 DMODE is expected destination mode. This is a recursive function. */
6221 spu_emit_vector_compare (enum rtx_code rcode,
6223 enum machine_mode dmode)
6227 enum machine_mode dest_mode;
6228 enum machine_mode op_mode = GET_MODE (op1);
6230 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6232 /* Floating point vector compare instructions uses destination V4SImode.
6233 Double floating point vector compare instructions uses destination V2DImode.
6234 Move destination to appropriate mode later. */
6235 if (dmode == V4SFmode)
6236 dest_mode = V4SImode;
6237 else if (dmode == V2DFmode)
6238 dest_mode = V2DImode;
6242 mask = gen_reg_rtx (dest_mode);
6243 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6245 if (vec_cmp_insn == -1)
6247 bool swap_operands = false;
6248 bool try_again = false;
6253 swap_operands = true;
6258 swap_operands = true;
6262 /* Treat A != B as ~(A==B). */
6264 enum insn_code nor_code;
6265 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6266 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
6267 gcc_assert (nor_code != CODE_FOR_nothing);
6268 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6269 if (dmode != dest_mode)
6271 rtx temp = gen_reg_rtx (dest_mode);
6272 convert_move (temp, mask, 0);
6282 /* Try GT/GTU/LT/LTU OR EQ */
6285 enum insn_code ior_code;
6286 enum rtx_code new_code;
6290 case GE: new_code = GT; break;
6291 case GEU: new_code = GTU; break;
6292 case LE: new_code = LT; break;
6293 case LEU: new_code = LTU; break;
6298 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6299 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6301 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
6302 gcc_assert (ior_code != CODE_FOR_nothing);
6303 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6304 if (dmode != dest_mode)
6306 rtx temp = gen_reg_rtx (dest_mode);
6307 convert_move (temp, mask, 0);
6317 /* You only get two chances. */
6319 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6321 gcc_assert (vec_cmp_insn != -1);
6332 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6333 if (dmode != dest_mode)
6335 rtx temp = gen_reg_rtx (dest_mode);
6336 convert_move (temp, mask, 0);
6343 /* Emit vector conditional expression.
6344 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6345 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6348 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6349 rtx cond, rtx cc_op0, rtx cc_op1)
6351 enum machine_mode dest_mode = GET_MODE (dest);
6352 enum rtx_code rcode = GET_CODE (cond);
6355 /* Get the vector mask for the given relational operations. */
6356 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6358 emit_insn(gen_selb (dest, op2, op1, mask));
6364 spu_force_reg (enum machine_mode mode, rtx op)
6367 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6369 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6370 || GET_MODE (op) == BLKmode)
6371 return force_reg (mode, convert_to_mode (mode, op, 0));
6375 r = force_reg (GET_MODE (op), op);
6376 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6378 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6383 x = gen_reg_rtx (mode);
6384 emit_insn (gen_spu_convert (x, r));
6389 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6391 HOST_WIDE_INT v = 0;
6393 /* Check the range of immediate operands. */
6394 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6396 int range = p - SPU_BTI_7;
6398 if (!CONSTANT_P (op))
6399 error ("%s expects an integer literal in the range [%d, %d].",
6401 spu_builtin_range[range].low, spu_builtin_range[range].high);
6403 if (GET_CODE (op) == CONST
6404 && (GET_CODE (XEXP (op, 0)) == PLUS
6405 || GET_CODE (XEXP (op, 0)) == MINUS))
6407 v = INTVAL (XEXP (XEXP (op, 0), 1));
6408 op = XEXP (XEXP (op, 0), 0);
6410 else if (GET_CODE (op) == CONST_INT)
6412 else if (GET_CODE (op) == CONST_VECTOR
6413 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6414 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6416 /* The default for v is 0 which is valid in every range. */
6417 if (v < spu_builtin_range[range].low
6418 || v > spu_builtin_range[range].high)
6419 error ("%s expects an integer literal in the range [%d, %d]. ("
6420 HOST_WIDE_INT_PRINT_DEC ")",
6422 spu_builtin_range[range].low, spu_builtin_range[range].high,
6431 /* This is only used in lqa, and stqa. Even though the insns
6432 encode 16 bits of the address (all but the 2 least
6433 significant), only 14 bits are used because it is masked to
6434 be 16 byte aligned. */
6438 /* This is used for lqr and stqr. */
6445 if (GET_CODE (op) == LABEL_REF
6446 || (GET_CODE (op) == SYMBOL_REF
6447 && SYMBOL_REF_FUNCTION_P (op))
6448 || (v & ((1 << lsbits) - 1)) != 0)
6449 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6456 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6457 rtx target, rtx ops[])
6459 enum insn_code icode = (enum insn_code) d->icode;
6462 /* Expand the arguments into rtl. */
6464 if (d->parm[0] != SPU_BTI_VOID)
6467 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6469 tree arg = CALL_EXPR_ARG (exp, a);
6472 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6475 /* The insn pattern may have additional operands (SCRATCH).
6476 Return the number of actual non-SCRATCH operands. */
6477 gcc_assert (i <= insn_data[icode].n_operands);
6482 spu_expand_builtin_1 (struct spu_builtin_description *d,
6483 tree exp, rtx target)
6487 enum insn_code icode = (enum insn_code) d->icode;
6488 enum machine_mode mode, tmode;
6493 /* Set up ops[] with values from arglist. */
6494 n_operands = expand_builtin_args (d, exp, target, ops);
6496 /* Handle the target operand which must be operand 0. */
6498 if (d->parm[0] != SPU_BTI_VOID)
6501 /* We prefer the mode specified for the match_operand otherwise
6502 use the mode from the builtin function prototype. */
6503 tmode = insn_data[d->icode].operand[0].mode;
6504 if (tmode == VOIDmode)
6505 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6507 /* Try to use target because not using it can lead to extra copies
6508 and when we are using all of the registers extra copies leads
6510 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6513 target = ops[0] = gen_reg_rtx (tmode);
6515 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6521 if (d->fcode == SPU_MASK_FOR_LOAD)
6523 enum machine_mode mode = insn_data[icode].operand[1].mode;
6528 arg = CALL_EXPR_ARG (exp, 0);
6529 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6530 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6531 addr = memory_address (mode, op);
6534 op = gen_reg_rtx (GET_MODE (addr));
6535 emit_insn (gen_rtx_SET (VOIDmode, op,
6536 gen_rtx_NEG (GET_MODE (addr), addr)));
6537 op = gen_rtx_MEM (mode, op);
6539 pat = GEN_FCN (icode) (target, op);
6546 /* Ignore align_hint, but still expand it's args in case they have
6548 if (icode == CODE_FOR_spu_align_hint)
6551 /* Handle the rest of the operands. */
6552 for (p = 1; i < n_operands; i++, p++)
6554 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6555 mode = insn_data[d->icode].operand[i].mode;
6557 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6559 /* mode can be VOIDmode here for labels */
6561 /* For specific intrinsics with an immediate operand, e.g.,
6562 si_ai(), we sometimes need to convert the scalar argument to a
6563 vector argument by splatting the scalar. */
6564 if (VECTOR_MODE_P (mode)
6565 && (GET_CODE (ops[i]) == CONST_INT
6566 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6567 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6569 if (GET_CODE (ops[i]) == CONST_INT)
6570 ops[i] = spu_const (mode, INTVAL (ops[i]));
6573 rtx reg = gen_reg_rtx (mode);
6574 enum machine_mode imode = GET_MODE_INNER (mode);
6575 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6576 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6577 if (imode != GET_MODE (ops[i]))
6578 ops[i] = convert_to_mode (imode, ops[i],
6579 TYPE_UNSIGNED (spu_builtin_types
6581 emit_insn (gen_spu_splats (reg, ops[i]));
6586 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6588 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6589 ops[i] = spu_force_reg (mode, ops[i]);
6595 pat = GEN_FCN (icode) (0);
6598 pat = GEN_FCN (icode) (ops[0]);
6601 pat = GEN_FCN (icode) (ops[0], ops[1]);
6604 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6607 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6610 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6613 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6622 if (d->type == B_CALL || d->type == B_BISLED)
6623 emit_call_insn (pat);
6624 else if (d->type == B_JUMP)
6626 emit_jump_insn (pat);
6632 return_type = spu_builtin_types[d->parm[0]];
6633 if (d->parm[0] != SPU_BTI_VOID
6634 && GET_MODE (target) != TYPE_MODE (return_type))
6636 /* target is the return value. It should always be the mode of
6637 the builtin function prototype. */
6638 target = spu_force_reg (TYPE_MODE (return_type), target);
6645 spu_expand_builtin (tree exp,
6647 rtx subtarget ATTRIBUTE_UNUSED,
6648 enum machine_mode mode ATTRIBUTE_UNUSED,
6649 int ignore ATTRIBUTE_UNUSED)
6651 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6652 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6653 struct spu_builtin_description *d;
6655 if (fcode < NUM_SPU_BUILTINS)
6657 d = &spu_builtins[fcode];
6659 return spu_expand_builtin_1 (d, exp, target);
6664 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6666 spu_builtin_mul_widen_even (tree type)
6668 switch (TYPE_MODE (type))
6671 if (TYPE_UNSIGNED (type))
6672 return spu_builtins[SPU_MULE_0].fndecl;
6674 return spu_builtins[SPU_MULE_1].fndecl;
6681 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6683 spu_builtin_mul_widen_odd (tree type)
6685 switch (TYPE_MODE (type))
6688 if (TYPE_UNSIGNED (type))
6689 return spu_builtins[SPU_MULO_1].fndecl;
6691 return spu_builtins[SPU_MULO_0].fndecl;
6698 /* Implement targetm.vectorize.builtin_mask_for_load. */
6700 spu_builtin_mask_for_load (void)
6702 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6707 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6709 spu_builtin_vectorization_cost (bool runtime_test)
6711 /* If the branch of the runtime test is taken - i.e. - the vectorized
6712 version is skipped - this incurs a misprediction cost (because the
6713 vectorized version is expected to be the fall-through). So we subtract
6714 the latency of a mispredicted branch from the costs that are incurred
6715 when the vectorized version is executed. */
6722 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6723 after applying N number of iterations. This routine does not determine
6724 how may iterations are required to reach desired alignment. */
6727 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6732 /* All other types are naturally aligned. */
6736 /* Implement targetm.vectorize.builtin_vec_perm. */
6738 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6740 struct spu_builtin_description *d;
6742 *mask_element_type = unsigned_char_type_node;
6744 switch (TYPE_MODE (type))
6747 if (TYPE_UNSIGNED (type))
6748 d = &spu_builtins[SPU_SHUFFLE_0];
6750 d = &spu_builtins[SPU_SHUFFLE_1];
6754 if (TYPE_UNSIGNED (type))
6755 d = &spu_builtins[SPU_SHUFFLE_2];
6757 d = &spu_builtins[SPU_SHUFFLE_3];
6761 if (TYPE_UNSIGNED (type))
6762 d = &spu_builtins[SPU_SHUFFLE_4];
6764 d = &spu_builtins[SPU_SHUFFLE_5];
6768 if (TYPE_UNSIGNED (type))
6769 d = &spu_builtins[SPU_SHUFFLE_6];
6771 d = &spu_builtins[SPU_SHUFFLE_7];
6775 d = &spu_builtins[SPU_SHUFFLE_8];
6779 d = &spu_builtins[SPU_SHUFFLE_9];
6790 /* Return the appropriate mode for a named address pointer. */
6791 static enum machine_mode
6792 spu_addr_space_pointer_mode (addr_space_t addrspace)
6796 case ADDR_SPACE_GENERIC:
6805 /* Return the appropriate mode for a named address address. */
6806 static enum machine_mode
6807 spu_addr_space_address_mode (addr_space_t addrspace)
6811 case ADDR_SPACE_GENERIC:
6820 /* Determine if one named address space is a subset of another. */
6823 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6825 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6826 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6828 if (subset == superset)
6831 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6832 being subsets but instead as disjoint address spaces. */
6833 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6837 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6840 /* Convert from one address space to another. */
6842 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6844 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6845 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6847 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6848 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6850 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6854 ls = gen_const_mem (DImode,
6855 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6856 set_mem_align (ls, 128);
6858 result = gen_reg_rtx (Pmode);
6859 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6860 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6861 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6862 ls, const0_rtx, Pmode, 1);
6864 emit_insn (gen_subsi3 (result, op, ls));
6869 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6873 ls = gen_const_mem (DImode,
6874 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6875 set_mem_align (ls, 128);
6877 result = gen_reg_rtx (EAmode);
6878 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6879 op = force_reg (Pmode, op);
6880 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6881 ls, const0_rtx, EAmode, 1);
6882 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6884 if (EAmode == SImode)
6885 emit_insn (gen_addsi3 (result, op, ls));
6887 emit_insn (gen_adddi3 (result, op, ls));
6897 /* Count the total number of instructions in each pipe and return the
6898 maximum, which is used as the Minimum Iteration Interval (MII)
6899 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6900 -2 are instructions that can go in pipe0 or pipe1. */
6902 spu_sms_res_mii (struct ddg *g)
6905 unsigned t[4] = {0, 0, 0, 0};
6907 for (i = 0; i < g->num_nodes; i++)
6909 rtx insn = g->nodes[i].insn;
6910 int p = get_pipe (insn) + 2;
6916 if (dump_file && INSN_P (insn))
6917 fprintf (dump_file, "i%d %s %d %d\n",
6919 insn_data[INSN_CODE(insn)].name,
6923 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6925 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6930 spu_init_expanders (void)
6935 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6936 frame_pointer_needed is true. We don't know that until we're
6937 expanding the prologue. */
6938 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6940 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6941 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6942 to be treated as aligned, so generate them here. */
6943 r0 = gen_reg_rtx (SImode);
6944 r1 = gen_reg_rtx (SImode);
6945 mark_reg_pointer (r0, 128);
6946 mark_reg_pointer (r1, 128);
6947 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6948 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6952 static enum machine_mode
6953 spu_libgcc_cmp_return_mode (void)
6956 /* For SPU word mode is TI mode so it is better to use SImode
6957 for compare returns. */
6961 static enum machine_mode
6962 spu_libgcc_shift_count_mode (void)
6964 /* For SPU word mode is TI mode so it is better to use SImode
6965 for shift counts. */
6969 /* An early place to adjust some flags after GCC has finished processing
6972 asm_file_start (void)
6974 /* Variable tracking should be run after all optimizations which
6975 change order of insns. It also needs a valid CFG. */
6976 spu_flag_var_tracking = flag_var_tracking;
6977 flag_var_tracking = 0;
6979 default_file_start ();
6982 /* Implement targetm.section_type_flags. */
6984 spu_section_type_flags (tree decl, const char *name, int reloc)
6986 /* .toe needs to have type @nobits. */
6987 if (strcmp (name, ".toe") == 0)
6989 /* Don't load _ea into the current address space. */
6990 if (strcmp (name, "._ea") == 0)
6991 return SECTION_WRITE | SECTION_DEBUG;
6992 return default_section_type_flags (decl, name, reloc);
6995 /* Implement targetm.select_section. */
6997 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6999 /* Variables and constants defined in the __ea address space
7000 go into a special section named "._ea". */
7001 if (TREE_TYPE (decl) != error_mark_node
7002 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7004 /* We might get called with string constants, but get_named_section
7005 doesn't like them as they are not DECLs. Also, we need to set
7006 flags in that case. */
7008 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7010 return get_named_section (decl, "._ea", reloc);
7013 return default_elf_select_section (decl, reloc, align);
7016 /* Implement targetm.unique_section. */
7018 spu_unique_section (tree decl, int reloc)
7020 /* We don't support unique section names in the __ea address
7022 if (TREE_TYPE (decl) != error_mark_node
7023 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7026 default_unique_section (decl, reloc);
7029 /* Generate a constant or register which contains 2^SCALE. We assume
7030 the result is valid for MODE. Currently, MODE must be V4SFmode and
7031 SCALE must be SImode. */
7033 spu_gen_exp2 (enum machine_mode mode, rtx scale)
7035 gcc_assert (mode == V4SFmode);
7036 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7037 if (GET_CODE (scale) != CONST_INT)
7039 /* unsigned int exp = (127 + scale) << 23;
7040 __vector float m = (__vector float) spu_splats (exp); */
7041 rtx reg = force_reg (SImode, scale);
7042 rtx exp = gen_reg_rtx (SImode);
7043 rtx mul = gen_reg_rtx (mode);
7044 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7045 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7046 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7051 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7052 unsigned char arr[16];
7053 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7054 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7055 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7056 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7057 return array_to_constant (mode, arr);
7061 /* After reload, just change the convert into a move instruction
7062 or a dead instruction. */
7064 spu_split_convert (rtx ops[])
7066 if (REGNO (ops[0]) == REGNO (ops[1]))
7067 emit_note (NOTE_INSN_DELETED);
7070 /* Use TImode always as this might help hard reg copyprop. */
7071 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7072 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7073 emit_insn (gen_move_insn (op0, op1));
7078 spu_function_profiler (FILE * file, int labelno)
7080 fprintf (file, "# profile\n");
7081 fprintf (file, "brsl $75, _mcount\n");