1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 You should have received a copy of the GNU General Public License
27 along with GCC; see the file COPYING. If not, write to the Free
28 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
31 #include "xtensa-config.h"
33 # Note: These functions use a minimum stack frame size of 32. This is
34 # necessary for Xtensa configurations that only support a fixed register
35 # window size of 8, where even leaf functions (such as these) need to
36 # allocate space for a 4-word "extra save area".
38 # Define macros for the ABS and ADDX* instructions to handle cases
39 # where they are not included in the Xtensa processor configuration.
41 .macro do_abs dst, src, tmp
46 movgez \tmp, \src, \src
51 .macro do_addx2 dst, as, at, tmp
60 .macro do_addx4 dst, as, at, tmp
69 .macro do_addx8 dst, as, at, tmp
81 .type __mulsi3,@function
101 #elif XCHAL_HAVE_MAC16
110 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
112 # Multiply one bit at a time, but unroll the loop 4x to better
113 # exploit the addx instructions and avoid overhead.
114 # Peel the first iteration to save a cycle on init.
116 # Avoid negative numbers.
117 xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative
121 # Swap so the second argument is smaller.
124 movgez a4, a2, a7 # a4 = max(a2, a3)
125 movltz a3, a2, a7 # a3 = min(a2, a3)
131 do_addx2 a7, a4, a2, a7
135 do_addx4 a7, a4, a2, a7
139 do_addx8 a7, a4, a2, a7
143 bgeui a3, 16, .Lmult_main_loop
157 do_addx2 a7, a4, a2, a7
161 do_addx4 a7, a4, a2, a7
165 do_addx8 a7, a4, a2, a7
169 bgeui a3, 16, .Lmult_main_loop
174 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
177 .size __mulsi3,.-__mulsi3
179 #endif /* L_mulsi3 */
182 # Define a macro for the NSAU (unsigned normalize shift amount)
183 # instruction, which computes the number of leading zero bits,
184 # to handle cases where it is not included in the Xtensa processor
187 .macro do_nsau cnt, val, tmp, a
193 extui \tmp, \a, 16, 16
198 extui \tmp, \a, 24, 8
203 movi \tmp, __nsau_data
208 #endif /* !XCHAL_HAVE_NSA */
215 .type __nsau_data,@object
218 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
219 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
220 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
221 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
222 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
223 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
224 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
225 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
226 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
227 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
228 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
229 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
230 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
231 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
232 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
233 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
234 #endif /* !XCHAL_HAVE_NSA */
235 .size __nsau_data,.-__nsau_data
243 .type __udivsi3,@function
246 bltui a3, 2, .Lle_one # check if the divisor <= 1
248 mov a6, a2 # keep dividend in a6
249 do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend)
250 do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor)
251 bgeu a5, a4, .Lspecial
253 sub a4, a4, a5 # count = divisor_shift - dividend_shift
255 sll a3, a3 # divisor <<= count
256 movi a2, 0 # quotient = 0
258 # test-subtract-and-shift loop; one quotient bit on each iteration
260 loopnez a4, .Lloopend
261 #endif /* XCHAL_HAVE_LOOPS */
263 bltu a6, a3, .Lzerobit
269 #if !XCHAL_HAVE_LOOPS
272 #endif /* !XCHAL_HAVE_LOOPS */
275 bltu a6, a3, .Lreturn
276 addi a2, a2, 1 # increment quotient if dividend >= divisor
281 # return dividend >= divisor
283 bltu a6, a3, .Lreturn2
289 beqz a3, .Lerror # if divisor == 1, return the dividend
292 movi a2, 0 # just return 0; could throw an exception
294 .size __udivsi3,.-__udivsi3
296 #endif /* L_udivsi3 */
302 .type __divsi3,@function
305 xor a7, a2, a3 # sign = dividend ^ divisor
306 do_abs a6, a2, a4 # udividend = abs(dividend)
307 do_abs a3, a3, a4 # udivisor = abs(divisor)
308 bltui a3, 2, .Lle_one # check if udivisor <= 1
309 do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend)
310 do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor)
311 bgeu a5, a4, .Lspecial
313 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
315 sll a3, a3 # udivisor <<= count
316 movi a2, 0 # quotient = 0
318 # test-subtract-and-shift loop; one quotient bit on each iteration
320 loopnez a4, .Lloopend
321 #endif /* XCHAL_HAVE_LOOPS */
323 bltu a6, a3, .Lzerobit
329 #if !XCHAL_HAVE_LOOPS
332 #endif /* !XCHAL_HAVE_LOOPS */
335 bltu a6, a3, .Lreturn
336 addi a2, a2, 1 # increment quotient if udividend >= udivisor
339 movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient
344 bltu a6, a3, .Lreturn2 # if dividend < divisor, return 0
347 movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1
353 neg a2, a6 # if udivisor == 1, then return...
354 movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend
357 movi a2, 0 # just return 0; could throw an exception
359 .size __divsi3,.-__divsi3
361 #endif /* L_divsi3 */
367 .type __umodsi3,@function
370 bltui a3, 2, .Lle_one # check if the divisor is <= 1
372 do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend)
373 do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor)
374 bgeu a5, a4, .Lspecial
376 sub a4, a4, a5 # count = divisor_shift - dividend_shift
378 sll a3, a3 # divisor <<= count
380 # test-subtract-and-shift loop
382 loopnez a4, .Lloopend
383 #endif /* XCHAL_HAVE_LOOPS */
385 bltu a2, a3, .Lzerobit
389 #if !XCHAL_HAVE_LOOPS
392 #endif /* !XCHAL_HAVE_LOOPS */
395 bltu a2, a3, .Lreturn
396 sub a2, a2, a3 # subtract once more if dividend >= divisor
401 bltu a2, a3, .Lreturn2
402 sub a2, a2, a3 # subtract once if dividend >= divisor
407 # the divisor is either 0 or 1, so just return 0.
408 # someday we may want to throw an exception if the divisor is 0.
411 .size __umodsi3,.-__umodsi3
413 #endif /* L_umodsi3 */
419 .type __modsi3,@function
422 mov a7, a2 # save original (signed) dividend
423 do_abs a2, a2, a4 # udividend = abs(dividend)
424 do_abs a3, a3, a4 # udivisor = abs(divisor)
425 bltui a3, 2, .Lle_one # check if udivisor <= 1
426 do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend)
427 do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor)
428 bgeu a5, a4, .Lspecial
430 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
432 sll a3, a3 # udivisor <<= count
434 # test-subtract-and-shift loop
436 loopnez a4, .Lloopend
437 #endif /* XCHAL_HAVE_LOOPS */
439 bltu a2, a3, .Lzerobit
443 #if !XCHAL_HAVE_LOOPS
446 #endif /* !XCHAL_HAVE_LOOPS */
449 bltu a2, a3, .Lreturn
450 sub a2, a2, a3 # subtract once more if udividend >= udivisor
453 neg a2, a2 # if (dividend < 0), return -udividend
458 bltu a2, a3, .Lreturn2
459 sub a2, a2, a3 # subtract once if dividend >= divisor
462 neg a2, a2 # if (dividend < 0), return -udividend
467 # udivisor is either 0 or 1, so just return 0.
468 # someday we may want to throw an exception if udivisor is 0.
471 .size __modsi3,.-__modsi3
473 #endif /* L_modsi3 */