--- /dev/null
+/*
+* 32bit complex vector * complex multiplying implementation. Round at 1bit under LSB twice.
+*
+* function prototype
+* void fr32_vector_complex_mul(
+* const fract32 ar[],
+* const fract32 ai[],
+* const fract32 br[],
+* const fract32 bi[],
+* fract32 cr[],
+* fract32 ci[],
+* int count
+*
+* parameters
+* FP+28 - int count
+* FP+28 - fr32 ci[]
+* FP+24 - fr32 cr[]
+* FP+20 - const fr32 bi[]
+* FP+16 R2 const fr32 br[]
+* FP+12 R1 const fr32 ai[]
+* FP+ 8 R0 const fr32 ar[]
+*
+* return
+* none
+*
+* side effect
+* cr[], c[i] : obtain output data.
+*
+
+* register layout
+* I0 : ar[]
+* I1 : ai[]
+* I2 : br[]
+* I2 : bi[]
+* P0 : cr[]
+* P1 : ci[]
+* P3 : count : loop counter's initial value
+* R0 : ar[i]
+* R1 : ai[i]
+* R2 : br[i]
+* R3 : bi[i]
+* R6 : a[i]*b[i]
+* R7 : rounding constant.
+*/
+
+ .text
+ .align 4
+ .global _fr32_vector_complex_mul;
+ .type _fr32_vector_complex_mul, STT_FUNC;
+
+_fr32_vector_complex_mul:
+ link 0;
+ [--sp] = (r7:4, p5:3); // save all preserved register
+
+ /* Set up registers */
+ i0 = r0; // ar
+ i1 = r1; // ai
+ i2 = r2; // br
+ r3 = [fp+20]; // bi
+ i3 = r3;
+ p0 = [fp+24]; // cr
+ p1 = [fp+28]; // ci
+ p3 = [fp+32]; // load count
+ r7 = 0x3FFF(z); // bit 14
+ r5 = 0;
+ r5.H = 0x0001;
+
+ /* outer loop */
+ loop count lc0 = p3;
+ loop_begin count;
+
+ // r7 is bias of rounding point
+ /* real part */
+ a1 = r7;
+ a0 = r5;
+ r0 = [i0++] || r2 = [i2++]; // load coeff & delaydata
+ r1 = [i1++] || r3 = [i3++];
+ a1 += r0.H * r2.L (m), a0 += r0.L * r2.L (fu); // r * r
+ a1 -= r1.H * r3.L (m), a0 -= r1.L * r3.L (fu); // i * i
+
+ /* Second half */
+ a0 = a0 >> 16; // scale down the unsigned integer.
+ a0 += a1;
+ a1 = a0;
+
+ a0 = r2.H * r0.H, a1 += r2.H * r0.L (m);
+ a0 -= r3.H * r1.H, a1 -= r3.H * r1.L (m);
+
+ a1 = a1 >>> 15; // scale down the signed integer
+ r6 = (a0 += a1);
+ [p0++] = r6; // store output sample
+
+
+
+ // r7 is bias of rounding point
+ /* imagenary part */
+ a1 = r7 ; // bit 14
+ a0 = r5;
+ a1 += r0.H * r3.L (m), a0 += r0.L * r3.L (fu); // r * r
+ a1 += r1.H * r2.L (m), a0 += r1.L * r2.L (fu); // i * i
+
+ /* Second half */
+ a0 = a0 >> 16; // scale down the unsigned integer.
+ a0 += a1;
+ a1 = a0;
+
+ a0 = r3.H * r0.H, a1 += r3.H * r0.L (m);
+ a0 += r2.H * r1.H, a1 += r2.H * r1.L (m);
+
+ a1 = a1 >>> 15; // scale down the signed integer
+ r6 = (a0 += a1);
+ [p1++] = r6; // store output sample
+
+
+ loop_end count;
+ /* end of outer loop */
+
+
+ (r7:4, p5:3) = [sp++]; // restore all preserved register
+ unlink;
+ rts;
+ .size _fr32_vector_complex_mul, .-_fr32_vector_complex_mul
/*
- * \brief complex vector multiplying A*B => C
- * \param ar input vector re(A)
- * \param ai input vector im(A)
- * \param br input vector re(B)
- * \param bi input vector im(B)
- * \param cr input vector re(C)
- * \param ci input vector im(C)
- * \count length of vector
- * \details
- * Mulitiply the complex fxied point vector A by B. Then, store the result to vector C
- */
-void fr32_vector_mult_ccc(
- const fract32 ar[],
- const fract32 ai[],
- const fract32 br[],
- const fract32 bi[],
- fract32 cr[],
- fract32 ci[],
- int count
- )
-{
- int i;
-
- for ( i=0; i<count; i++)
- {
- cr[i] =
- sub_fr1x32(
- mult_fr1x32x32(ar[i], br[i] ),
- mult_fr1x32x32(ai[i], bi[i])
- );
- ci[i] =
- add_fr1x32(
- mult_fr1x32x32(ar[i], bi[i] ), /* sin * cos */
- mult_fr1x32x32(ai[i], br[i]) /* cos * sin */
- );
- }
-}
-
- /*
* \brief complex vector multiplying A*B => C : where A is real.
* \param ar input vector re(A)
* \param ai input vector im(A)
* \details
* Mulitiply the complex fxied point vector A by B. Then, store the result to vector C
*/
-void fr32_vector_mult_ccc(
+void fr32_vector_complex_mul(
const fract32 ar[],
const fract32 ai[],
const fract32 br[],
int count
);
- /**
- * \brief complex vector multiplying A*B => C : where A is real.
- * \param ar input vector A
- * \param br input vector re(B)
- * \param bi input vector im(B)
- * \param cr input vector re(C)
- * \param ci input vector im(C)
- * \count length of vector
- * \details
- * Mulitiply the real vector A by the complex fxied point vector B. Then, store the result to vector C
- */
-void fr32_vector_mult_rcc(
- const fract32 ar[],
- const fract32 br[],
- const fract32 bi[],
- fract32 cr[],
- fract32 ci[],
- int count
- );
-
- /**
- * \brief complex vector multiplying A*B => C : where C is real
- * \param ar input vector re(A)
- * \param ai input vector im(A)
- * \param br input vector re(B)
- * \param bi input vector im(B)
- * \param cr input vector re(C)
- * \count length of vector
- * \details
- * Mulitiply the complex fxied point vector A by B. Then, store the real part of result to vector C
- */
-void fr32_vector_mult_ccr(
- const fract32 ar[],
- const fract32 ai[],
- const fract32 br[],
- const fract32 bi[],
- fract32 cr[],
- int count
- );
#endif /* FX32_VECTOR_H_ */
#include "fx_vector_test.h"
#include <stdio.h>
+
+#define SATP(x) (((x)>2147483647LL)?(2147483647LL):x)
+#define SAT(x) (((x)>=-2147483648LL)?(SATP(x)):-2147483648LL)
+
+#define mult_fr1x32x32(x,y) SAT(((long long )(x)*(long long)(y)+0x40000000)>>31)
+#define neg_mult_fr1x32x32(x,y) SAT((-(long long )(x)*(long long)(y)+0x40000000)>>31)
+
+
void clearBuffer ( fract32 buf[], int count)
{
int i;
#undef TAPS_06
#undef NUMSAMPLE_06
+
+
+
+/*
+ * Basic test to see scalar + vector addition.
+ */
+#define NUMSAMPLE_07 61
+
+fract32 buf_ar_07[NUMSAMPLE_07]=
+ {
+ 0x00000000, // offset check,
+
+ 0x40008000, // summation test for 4 partial products
+ 0x80000000, // -1*H,
+ 0x80000000, // -1*L
+ 0x10000000, // H*-1
+ 0x00001000, // L*-1
+ 0x00008000, // L*L test 1
+ 0x00008000, // L*L test 2
+ 0x00008000, // L*L test 3
+ 0x00000002, // H*L test 1
+ 0x00000002, // H*L test 2
+ 0x00000002, // H*L test 3
+ 0x40000000,
+ 0x20000000,
+ 0x10000000,
+ 0x00000000, // dummy
+
+ 0x40008000, // summation test for 4 partial products
+ 0x80000000, // -1*H,
+ 0x80000000, // -1*L
+ 0x10000000, // H*-1
+ 0x00001000, // L*-1
+ 0x00008000, // L*L test 1
+ 0x00008000, // L*L test 2
+ 0x00008000, // L*L test 3
+ 0x00000002, // H*L test 1
+ 0x00000002, // H*L test 2
+ 0x00000002, // H*L test 3
+ 0x40000000,
+ 0x20000000,
+ 0x10000000,
+ 0x00000000, // dummy
+
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+
+
+ };
+
+
+fract32 buf_ai_07[NUMSAMPLE_07]=
+ {
+ 0x00000000,
+
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+
+ 0x40008000, // summation test for 4 partial products
+ 0x80000000, // -1*H,
+ 0x80000000, // -1*L
+ 0x10000000, // H*-1
+ 0x00001000, // L*-1
+ 0x00008000, // L*L test 1
+ 0x00008000, // L*L test 2
+ 0x00008000, // L*L test 3
+ 0x00000002, // H*L test 1
+ 0x00000002, // H*L test 2
+ 0x00000002, // H*L test 3
+ 0x40000000,
+ 0x20000000,
+ 0x10000000,
+ 0x00000000, // dummy
+
+ 0x40008000, // summation test for 4 partial products
+ 0x80000000, // -1*H,
+ 0x80000000, // -1*L
+ 0x10000000, // H*-1
+ 0x00001000, // L*-1
+ 0x00008000, // L*L test 1
+ 0x00008000, // L*L test 2
+ 0x00008000, // L*L test 3
+ 0x00000002, // H*L test 1
+ 0x00000002, // H*L test 2
+ 0x00000002, // H*L test 3
+ 0x40000000,
+ 0x20000000,
+ 0x10000000,
+ 0x00000000, // dummy
+
+ };
+
+
+fract32 buf_br_07[NUMSAMPLE_07]=
+ {
+ 0x00000000,
+
+ 0x20008000, //
+ 0x10000000, //
+ 0x00001000,
+ 0x80000000,
+ 0x80000000,
+ 0x00008000,
+ 0x00004000,
+ 0x00002000,
+ 0x40000000,
+ 0x20000000,
+ 0x10000000,
+ 0x00000002, // H*L test 1
+ 0x00000002, // H*L test 2
+ 0x00000002, // H*L test 3
+ 0x00000000,
+
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+
+ 0x20008000, //
+ 0x10000000, //
+ 0x00001000,
+ 0x80000000,
+ 0x80000000,
+ 0x00008000,
+ 0x00004000,
+ 0x00002000,
+ 0x40000000,
+ 0x20000000,
+ 0x10000000,
+ 0x00000002, // H*L test 1
+ 0x00000002, // H*L test 2
+ 0x00000002, // H*L test 3
+ 0x00000000,
+
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+
+
+ };
+
+fract32 buf_bi_07[NUMSAMPLE_07]=
+ {
+ 0x00000000,
+
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+
+ 0x20008000, //
+ 0x10000000, //
+ 0x00001000,
+ 0x80000000,
+ 0x80000000,
+ 0x00008000,
+ 0x00004000,
+ 0x00002000,
+ 0x40000000,
+ 0x20000000,
+ 0x10000000,
+ 0x00000002, // H*L test 1
+ 0x00000002, // H*L test 2
+ 0x00000002, // H*L test 3
+ 0x00000000,
+
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+
+ 0x20008000, //
+ 0x10000000, //
+ 0x00001000,
+ 0x80000000,
+ 0x80000000,
+ 0x00008000,
+ 0x00004000,
+ 0x00002000,
+ 0x40000000,
+ 0x20000000,
+ 0x10000000,
+ 0x00000002, // H*L test 1
+ 0x00000002, // H*L test 2
+ 0x00000002, // H*L test 3
+ 0x00000000,
+
+
+ };
+
+fract32 desired_r_07[NUMSAMPLE_07] ;
+
+fract32 desired_i_07[NUMSAMPLE_07] ;
+
+void test_07_fr32_vector_complex_mul()
+{
+ fract32 output_r[NUMSAMPLE_07], output_i[NUMSAMPLE_07];
+ int i;
+
+
+ // clear output buffer
+ clearBuffer( output_r, NUMSAMPLE_07);
+ clearBuffer( output_i, NUMSAMPLE_07);
+
+ for ( i=0; i<NUMSAMPLE_07; i++)
+ {
+ desired_r_07[i] = mult_fr1x32x32( buf_ar_07[i], buf_br_07[i])
+ - mult_fr1x32x32( buf_ai_07[i], buf_bi_07[i]);
+ desired_i_07[i] = mult_fr1x32x32( buf_ar_07[i], buf_bi_07[i])
+ + mult_fr1x32x32( buf_ai_07[i], buf_br_07[i]);
+ }
+
+ for ( i=46; i<NUMSAMPLE_07; i++)
+ {
+ desired_r_07[i] = mult_fr1x32x32( buf_ar_07[i], buf_br_07[i])
+ + neg_mult_fr1x32x32(buf_ai_07[i], buf_bi_07[i]);
+ }
+
+ // test subtraction. Sample is less than NUMSAMPLE_07 to test the count parameter
+ fr32_vector_complex_mul( buf_ar_07, buf_ai_07, buf_br_07, buf_bi_07,
+ output_r, output_i, NUMSAMPLE_07);
+
+ for ( i=0; i<NUMSAMPLE_07; i++)
+ {
+ if ( output_r[i] != desired_r_07[i] )
+ {
+ printf( "test_07 NG :output_r[%2d] = 0x%08X but should be 0x%08X\n", i, output_r[i], desired_r_07[i] );
+ return;
+ }
+ if ( output_i[i] != desired_i_07[i] )
+ {
+ printf( "test_07 NG :output_i[%2d] = 0x%08X but should be 0x%08X\n", i, output_i[i], desired_i_07[i] );
+ return;
+ }
+ }
+ printf ("test_07 OK\n");
+}
+
+#undef TAPS_07
+#undef NUMSAMPLE_07
+