1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 You should have received a copy of the GNU General Public License
27 along with GCC; see the file COPYING. If not, write to the Free
28 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
31 #include "xtensa-config.h"
33 # Note: These functions use a minimum stack frame size of 32. This is
34 # necessary for Xtensa configurations that only support a fixed register
35 # window size of 8, where even leaf functions (such as these) need to
36 # allocate space for a 4-word "extra save area".
38 # Define macros for the ABS and ADDX* instructions to handle cases
39 # where they are not included in the Xtensa processor configuration.
41 .macro do_abs dst, src, tmp
46 movgez \tmp, \src, \src
51 .macro do_addx2 dst, as, at, tmp
60 .macro do_addx4 dst, as, at, tmp
69 .macro do_addx8 dst, as, at, tmp
78 # Define macros for function entry and return, supporting either the
79 # standard register windowed ABI or the non-windowed call0 ABI. These
80 # macros do not allocate any extra stack space, so they only work for
81 # leaf functions that do not need to spill anything to the stack.
83 .macro abi_entry reg, size
84 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
92 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
103 .type __mulsi3,@function
123 #elif XCHAL_HAVE_MAC16
132 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
134 # Multiply one bit at a time, but unroll the loop 4x to better
135 # exploit the addx instructions and avoid overhead.
136 # Peel the first iteration to save a cycle on init.
138 # Avoid negative numbers.
139 xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative
143 # Swap so the second argument is smaller.
146 movgez a4, a2, a7 # a4 = max(a2, a3)
147 movltz a3, a2, a7 # a3 = min(a2, a3)
153 do_addx2 a7, a4, a2, a7
157 do_addx4 a7, a4, a2, a7
161 do_addx8 a7, a4, a2, a7
165 bgeui a3, 16, .Lmult_main_loop
179 do_addx2 a7, a4, a2, a7
183 do_addx4 a7, a4, a2, a7
187 do_addx8 a7, a4, a2, a7
191 bgeui a3, 16, .Lmult_main_loop
196 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
199 .size __mulsi3,.-__mulsi3
201 #endif /* L_mulsi3 */
204 # Define a macro for the NSAU (unsigned normalize shift amount)
205 # instruction, which computes the number of leading zero bits,
206 # to handle cases where it is not included in the Xtensa processor
209 .macro do_nsau cnt, val, tmp, a
215 extui \tmp, \a, 16, 16
220 extui \tmp, \a, 24, 8
225 movi \tmp, __nsau_data
230 #endif /* !XCHAL_HAVE_NSA */
237 .type __nsau_data,@object
240 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
241 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
242 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
243 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
244 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
245 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
246 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
247 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
248 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
249 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
250 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
251 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
252 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
253 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
254 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
255 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
256 #endif /* !XCHAL_HAVE_NSA */
257 .size __nsau_data,.-__nsau_data
265 .type __udivsi3,@function
268 bltui a3, 2, .Lle_one # check if the divisor <= 1
270 mov a6, a2 # keep dividend in a6
271 do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend)
272 do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor)
273 bgeu a5, a4, .Lspecial
275 sub a4, a4, a5 # count = divisor_shift - dividend_shift
277 sll a3, a3 # divisor <<= count
278 movi a2, 0 # quotient = 0
280 # test-subtract-and-shift loop; one quotient bit on each iteration
282 loopnez a4, .Lloopend
283 #endif /* XCHAL_HAVE_LOOPS */
285 bltu a6, a3, .Lzerobit
291 #if !XCHAL_HAVE_LOOPS
294 #endif /* !XCHAL_HAVE_LOOPS */
297 bltu a6, a3, .Lreturn
298 addi a2, a2, 1 # increment quotient if dividend >= divisor
303 # return dividend >= divisor
305 bltu a6, a3, .Lreturn2
311 beqz a3, .Lerror # if divisor == 1, return the dividend
314 movi a2, 0 # just return 0; could throw an exception
316 .size __udivsi3,.-__udivsi3
318 #endif /* L_udivsi3 */
324 .type __divsi3,@function
327 xor a7, a2, a3 # sign = dividend ^ divisor
328 do_abs a6, a2, a4 # udividend = abs(dividend)
329 do_abs a3, a3, a4 # udivisor = abs(divisor)
330 bltui a3, 2, .Lle_one # check if udivisor <= 1
331 do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend)
332 do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor)
333 bgeu a5, a4, .Lspecial
335 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
337 sll a3, a3 # udivisor <<= count
338 movi a2, 0 # quotient = 0
340 # test-subtract-and-shift loop; one quotient bit on each iteration
342 loopnez a4, .Lloopend
343 #endif /* XCHAL_HAVE_LOOPS */
345 bltu a6, a3, .Lzerobit
351 #if !XCHAL_HAVE_LOOPS
354 #endif /* !XCHAL_HAVE_LOOPS */
357 bltu a6, a3, .Lreturn
358 addi a2, a2, 1 # increment quotient if udividend >= udivisor
361 movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient
366 bltu a6, a3, .Lreturn2 # if dividend < divisor, return 0
369 movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1
375 neg a2, a6 # if udivisor == 1, then return...
376 movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend
379 movi a2, 0 # just return 0; could throw an exception
381 .size __divsi3,.-__divsi3
383 #endif /* L_divsi3 */
389 .type __umodsi3,@function
392 bltui a3, 2, .Lle_one # check if the divisor is <= 1
394 do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend)
395 do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor)
396 bgeu a5, a4, .Lspecial
398 sub a4, a4, a5 # count = divisor_shift - dividend_shift
400 sll a3, a3 # divisor <<= count
402 # test-subtract-and-shift loop
404 loopnez a4, .Lloopend
405 #endif /* XCHAL_HAVE_LOOPS */
407 bltu a2, a3, .Lzerobit
411 #if !XCHAL_HAVE_LOOPS
414 #endif /* !XCHAL_HAVE_LOOPS */
417 bltu a2, a3, .Lreturn
418 sub a2, a2, a3 # subtract once more if dividend >= divisor
423 bltu a2, a3, .Lreturn2
424 sub a2, a2, a3 # subtract once if dividend >= divisor
429 # the divisor is either 0 or 1, so just return 0.
430 # someday we may want to throw an exception if the divisor is 0.
433 .size __umodsi3,.-__umodsi3
435 #endif /* L_umodsi3 */
441 .type __modsi3,@function
444 mov a7, a2 # save original (signed) dividend
445 do_abs a2, a2, a4 # udividend = abs(dividend)
446 do_abs a3, a3, a4 # udivisor = abs(divisor)
447 bltui a3, 2, .Lle_one # check if udivisor <= 1
448 do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend)
449 do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor)
450 bgeu a5, a4, .Lspecial
452 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
454 sll a3, a3 # udivisor <<= count
456 # test-subtract-and-shift loop
458 loopnez a4, .Lloopend
459 #endif /* XCHAL_HAVE_LOOPS */
461 bltu a2, a3, .Lzerobit
465 #if !XCHAL_HAVE_LOOPS
468 #endif /* !XCHAL_HAVE_LOOPS */
471 bltu a2, a3, .Lreturn
472 sub a2, a2, a3 # subtract once more if udividend >= udivisor
475 neg a2, a2 # if (dividend < 0), return -udividend
480 bltu a2, a3, .Lreturn2
481 sub a2, a2, a3 # subtract once if dividend >= divisor
484 neg a2, a2 # if (dividend < 0), return -udividend
489 # udivisor is either 0 or 1, so just return 0.
490 # someday we may want to throw an exception if udivisor is 0.
493 .size __modsi3,.-__modsi3
495 #endif /* L_modsi3 */