fr32_vector_complex_mul() passed test.

author suikan <suikan@users.sourceforge.jp>

Fri, 21 Feb 2014 22:55:09 +0000 (07:55 +0900)

committer suikan <suikan@users.sourceforge.jp>

Fri, 21 Feb 2014 22:55:09 +0000 (07:55 +0900)
author suikan <suikan@users.sourceforge.jp>
Fri, 21 Feb 2014 22:55:09 +0000 (07:55 +0900)
committer suikan <suikan@users.sourceforge.jp>
Fri, 21 Feb 2014 22:55:09 +0000 (07:55 +0900)
diff --git a/algorithm_vector/Makefile b/algorithm_vector/Makefile

index 80c35ec..adc31b1 100644 (file)
--- a/algorithm_vector/Makefile
+++ b/algorithm_vector/Makefile
@@ -3,7 +3,8 @@ LDFLAGS      = -msim
  CCFLAGS       =  -O0 -g
  LIBS             = -lm
  OBJS          = main.o fx32_vector.o fr32_vector_add.o fx_vector_test.o fr32_vector_add_svv.o \
-                               fr32_vector_sub.o  fr32_vector_sub_svv.o fr32_vector_mul.o fr32_vector_mul_svv.o
+                               fr32_vector_sub.o  fr32_vector_sub_svv.o fr32_vector_mul.o fr32_vector_mul_svv.o \
+                               fr32_vector_complex_mul.o
  
  all:   a.out
  
diff --git a/algorithm_vector/fr32_vector_complex_mul.S b/algorithm_vector/fr32_vector_complex_mul.S

new file mode 100644 (file)

index 0000000..75d4c78
--- /dev/null
+++ b/algorithm_vector/fr32_vector_complex_mul.S
@@ -0,0 +1,122 @@
+/*
+* 32bit complex vector * complex multiplying implementation. Round at 1bit under LSB twice.
+*
+* function prototype
+* void fr32_vector_complex_mul(
+*        const fract32 ar[],
+*        const fract32 ai[],
+*        const fract32 br[],
+*        const fract32 bi[],
+*        fract32 cr[],
+*        fract32 ci[],
+*        int count
+*
+* parameters
+*   FP+28      -       int count
+*   FP+28      -       fr32 ci[]
+*   FP+24      -       fr32 cr[]
+*   FP+20      -       const fr32 bi[]
+*   FP+16      R2      const fr32 br[]
+*      FP+12   R1      const fr32 ai[]
+*      FP+ 8   R0      const fr32 ar[]
+*
+* return
+*      none
+*
+* side effect
+*   cr[], c[i] : obtain output data.
+*
+
+* register layout
+*   I0 : ar[]
+*   I1 : ai[]
+*   I2 : br[]
+*   I2 : bi[]
+*   P0 : cr[]
+*   P1 : ci[]
+*      P3 : count : loop counter's initial value
+*   R0 : ar[i]
+*   R1 : ai[i]
+*      R2 : br[i]
+*      R3 : bi[i]
+*      R6 : a[i]*b[i]
+*      R7 : rounding constant.
+*/
+
+       .text
+       .align 4
+       .global _fr32_vector_complex_mul;
+       .type _fr32_vector_complex_mul, STT_FUNC;
+
+_fr32_vector_complex_mul:
+       link    0;
+       [--sp] = (r7:4, p5:3);          // save all preserved register
+
+               /* Set up registers */
+       i0 = r0;                        // ar
+       i1 = r1;                        // ai
+       i2 = r2;                        // br
+       r3 = [fp+20];           // bi
+       i3 = r3;
+       p0 = [fp+24];           // cr
+       p1 = [fp+28];           // ci
+       p3 = [fp+32];           // load count
+       r7 = 0x3FFF(z);         // bit 14
+       r5 = 0;
+       r5.H = 0x0001;
+
+               /* outer loop */
+       loop count lc0 = p3;
+       loop_begin count;
+
+                               // r7 is bias of rounding point
+                               /* real part */
+               a1 = r7;
+               a0 = r5;
+               r0 = [i0++] || r2 = [i2++];                             // load coeff & delaydata
+               r1 = [i1++] || r3 = [i3++];
+               a1 += r0.H * r2.L (m), a0 += r0.L * r2.L (fu);          // r * r
+               a1 -= r1.H * r3.L (m), a0 -= r1.L * r3.L (fu);          // i * i
+
+                               /* Second half */
+               a0 = a0 >> 16;                                  // scale down the unsigned integer.
+               a0 += a1;
+               a1 = a0;
+
+               a0  = r2.H * r0.H, a1 += r2.H * r0.L (m);
+               a0 -= r3.H * r1.H, a1 -= r3.H * r1.L (m);
+
+               a1 = a1 >>> 15;                         // scale down the signed integer
+               r6 = (a0 += a1);
+               [p0++] = r6;                            // store output sample
+
+
+
+                               // r7 is bias of rounding point
+                               /* imagenary part */
+               a1 = r7 ;                               // bit 14
+               a0 = r5;
+               a1 += r0.H * r3.L (m), a0 += r0.L * r3.L (fu);          // r * r
+               a1 += r1.H * r2.L (m), a0 += r1.L * r2.L (fu);          // i * i
+
+                               /* Second half */
+               a0 = a0 >> 16;                                  // scale down the unsigned integer.
+               a0 += a1;
+               a1 = a0;
+
+               a0  = r3.H * r0.H, a1 += r3.H * r0.L (m);
+               a0 += r2.H * r1.H, a1 += r2.H * r1.L (m);
+
+               a1 = a1 >>> 15;                         // scale down the signed integer
+               r6 = (a0 += a1);
+               [p1++] = r6;                            // store output sample
+
+
+       loop_end count;
+               /* end of outer loop */
+
+
+       (r7:4, p5:3) = [sp++];          // restore all preserved register
+       unlink;
+       rts;
+       .size   _fr32_vector_complex_mul, .-_fr32_vector_complex_mul
diff --git a/algorithm_vector/fx32_vector.c b/algorithm_vector/fx32_vector.c

index 75587d5..f84e803 100644 (file)
--- a/algorithm_vector/fx32_vector.c
+++ b/algorithm_vector/fx32_vector.c
@@ -13,45 +13,6 @@
  
  
      /*
-     * \brief complex vector multiplying A*B => C
-     * \param ar input vector re(A)
-     * \param ai input vector im(A)
-     * \param br input vector re(B)
-     * \param bi input vector im(B)
-     * \param cr input vector re(C)
-     * \param ci input vector im(C)
-     * \count length of vector
-     * \details
-     * Mulitiply the complex fxied point vector A by B. Then, store the result to vector C
-     */
-void fr32_vector_mult_ccc(
-        const fract32 ar[],
-        const fract32 ai[],
-        const fract32 br[],
-        const fract32 bi[],
-        fract32 cr[],
-        fract32 ci[],
-        int count
-        )
-{
-    int i;
-
-    for ( i=0; i<count; i++)
-    {
-        cr[i] =
-            sub_fr1x32(
-                mult_fr1x32x32(ar[i], br[i] ),
-                mult_fr1x32x32(ai[i], bi[i])
-            );
-        ci[i] =
-            add_fr1x32(
-                mult_fr1x32x32(ar[i], bi[i] ), /* sin * cos */
-                mult_fr1x32x32(ai[i], br[i])     /* cos * sin */
-            );
-    }
-}
-
-    /*
       * \brief complex vector multiplying A*B => C : where A is real.
       * \param ar input vector re(A)
       * \param ai input vector im(A)
diff --git a/algorithm_vector/fx32_vector.h b/algorithm_vector/fx32_vector.h

index ea851a1..4065834 100644 (file)
--- a/algorithm_vector/fx32_vector.h
+++ b/algorithm_vector/fx32_vector.h
@@ -120,7 +120,7 @@ void fr32_vector_mul_svv(
       * \details
       * Mulitiply the complex fxied point vector A by B. Then, store the result to vector C
       */
-void fr32_vector_mult_ccc(
+void fr32_vector_complex_mul(
          const fract32 ar[],
          const fract32 ai[],
          const fract32 br[],
@@ -130,44 +130,5 @@ void fr32_vector_mult_ccc(
          int count
          );
  
-    /**
-     * \brief complex vector multiplying A*B => C : where A is real.
-     * \param ar input vector A
-     * \param br input vector re(B)
-     * \param bi input vector im(B)
-     * \param cr input vector re(C)
-     * \param ci input vector im(C)
-     * \count length of vector
-     * \details
-     * Mulitiply the real vector A by the complex fxied point vector B. Then, store the result to vector C
-     */
-void fr32_vector_mult_rcc(
-        const fract32 ar[],
-        const fract32 br[],
-        const fract32 bi[],
-        fract32 cr[],
-        fract32 ci[],
-        int count
-        );
-
-    /**
-     * \brief complex vector multiplying A*B => C : where C is real
-     * \param ar input vector re(A)
-     * \param ai input vector im(A)
-     * \param br input vector re(B)
-     * \param bi input vector im(B)
-     * \param cr input vector re(C)
-     * \count length of vector
-     * \details
-     * Mulitiply the complex fxied point vector A by B. Then, store the real part of result to vector C
-     */
-void fr32_vector_mult_ccr(
-        const fract32 ar[],
-        const fract32 ai[],
-        const fract32 br[],
-        const fract32 bi[],
-        fract32 cr[],
-        int count
-        );
  
  #endif /* FX32_VECTOR_H_ */
diff --git a/algorithm_vector/fx_vector_test.c b/algorithm_vector/fx_vector_test.c

index e35a88b..68d4867 100644 (file)
--- a/algorithm_vector/fx_vector_test.c
+++ b/algorithm_vector/fx_vector_test.c
@@ -8,6 +8,14 @@
  #include "fx_vector_test.h"
  #include <stdio.h>
  
+
+#define SATP(x)  (((x)>2147483647LL)?(2147483647LL):x)
+#define SAT(x) (((x)>=-2147483648LL)?(SATP(x)):-2147483648LL)
+
+#define mult_fr1x32x32(x,y) SAT(((long long )(x)*(long long)(y)+0x40000000)>>31)
+#define neg_mult_fr1x32x32(x,y) SAT((-(long long )(x)*(long long)(y)+0x40000000)>>31)
+
+
  void clearBuffer ( fract32 buf[], int count)
  {
      int i;
@@ -397,3 +405,347 @@ void test_06_fr32_vector_mul_svv()
  
  #undef TAPS_06
  #undef NUMSAMPLE_06
+
+
+
+/*
+ * Basic test to see scalar + vector addition.
+ */
+#define NUMSAMPLE_07 61
+
+fract32 buf_ar_07[NUMSAMPLE_07]=
+    {
+        0x00000000,     // offset check,
+
+        0x40008000,     // summation test for 4 partial products
+        0x80000000,     // -1*H,
+        0x80000000,     // -1*L
+        0x10000000,     // H*-1
+        0x00001000,     // L*-1
+        0x00008000,     // L*L test 1
+        0x00008000,     // L*L test 2
+        0x00008000,     // L*L test 3
+        0x00000002,     // H*L test 1
+        0x00000002,     // H*L test 2
+        0x00000002,     // H*L test 3
+        0x40000000,
+        0x20000000,
+        0x10000000,
+        0x00000000,     // dummy
+
+        0x40008000,     // summation test for 4 partial products
+        0x80000000,     // -1*H,
+        0x80000000,     // -1*L
+        0x10000000,     // H*-1
+        0x00001000,     // L*-1
+        0x00008000,     // L*L test 1
+        0x00008000,     // L*L test 2
+        0x00008000,     // L*L test 3
+        0x00000002,     // H*L test 1
+        0x00000002,     // H*L test 2
+        0x00000002,     // H*L test 3
+        0x40000000,
+        0x20000000,
+        0x10000000,
+        0x00000000,     // dummy
+
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+
+
+    };
+
+
+fract32 buf_ai_07[NUMSAMPLE_07]=
+    {
+        0x00000000,
+
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+
+        0x40008000,     // summation test for 4 partial products
+        0x80000000,     // -1*H,
+        0x80000000,     // -1*L
+        0x10000000,     // H*-1
+        0x00001000,     // L*-1
+        0x00008000,     // L*L test 1
+        0x00008000,     // L*L test 2
+        0x00008000,     // L*L test 3
+        0x00000002,     // H*L test 1
+        0x00000002,     // H*L test 2
+        0x00000002,     // H*L test 3
+        0x40000000,
+        0x20000000,
+        0x10000000,
+        0x00000000,     // dummy
+
+        0x40008000,     // summation test for 4 partial products
+        0x80000000,     // -1*H,
+        0x80000000,     // -1*L
+        0x10000000,     // H*-1
+        0x00001000,     // L*-1
+        0x00008000,     // L*L test 1
+        0x00008000,     // L*L test 2
+        0x00008000,     // L*L test 3
+        0x00000002,     // H*L test 1
+        0x00000002,     // H*L test 2
+        0x00000002,     // H*L test 3
+        0x40000000,
+        0x20000000,
+        0x10000000,
+        0x00000000,     // dummy
+
+    };
+
+
+fract32 buf_br_07[NUMSAMPLE_07]=
+    {
+        0x00000000,
+
+        0x20008000,     //
+        0x10000000,     //
+        0x00001000,
+        0x80000000,
+        0x80000000,
+        0x00008000,
+        0x00004000,
+        0x00002000,
+        0x40000000,
+        0x20000000,
+        0x10000000,
+        0x00000002,     // H*L test 1
+        0x00000002,     // H*L test 2
+        0x00000002,     // H*L test 3
+        0x00000000,
+
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+
+        0x20008000,     //
+        0x10000000,     //
+        0x00001000,
+        0x80000000,
+        0x80000000,
+        0x00008000,
+        0x00004000,
+        0x00002000,
+        0x40000000,
+        0x20000000,
+        0x10000000,
+        0x00000002,     // H*L test 1
+        0x00000002,     // H*L test 2
+        0x00000002,     // H*L test 3
+        0x00000000,
+
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+
+
+    };
+
+fract32 buf_bi_07[NUMSAMPLE_07]=
+    {
+        0x00000000,
+
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+
+        0x20008000,     //
+        0x10000000,     //
+        0x00001000,
+        0x80000000,
+        0x80000000,
+        0x00008000,
+        0x00004000,
+        0x00002000,
+        0x40000000,
+        0x20000000,
+        0x10000000,
+        0x00000002,     // H*L test 1
+        0x00000002,     // H*L test 2
+        0x00000002,     // H*L test 3
+        0x00000000,
+
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+
+        0x20008000,     //
+        0x10000000,     //
+        0x00001000,
+        0x80000000,
+        0x80000000,
+        0x00008000,
+        0x00004000,
+        0x00002000,
+        0x40000000,
+        0x20000000,
+        0x10000000,
+        0x00000002,     // H*L test 1
+        0x00000002,     // H*L test 2
+        0x00000002,     // H*L test 3
+        0x00000000,
+
+
+    };
+
+fract32 desired_r_07[NUMSAMPLE_07] ;
+
+fract32 desired_i_07[NUMSAMPLE_07] ;
+
+void test_07_fr32_vector_complex_mul()
+{
+    fract32 output_r[NUMSAMPLE_07], output_i[NUMSAMPLE_07];
+    int i;
+
+
+        // clear output buffer
+    clearBuffer( output_r, NUMSAMPLE_07);
+    clearBuffer( output_i, NUMSAMPLE_07);
+
+    for ( i=0; i<NUMSAMPLE_07; i++)
+    {
+        desired_r_07[i] = mult_fr1x32x32( buf_ar_07[i], buf_br_07[i])
+                        - mult_fr1x32x32( buf_ai_07[i], buf_bi_07[i]);
+        desired_i_07[i] = mult_fr1x32x32( buf_ar_07[i], buf_bi_07[i])
+                        + mult_fr1x32x32( buf_ai_07[i], buf_br_07[i]);
+    }
+
+    for ( i=46; i<NUMSAMPLE_07; i++)
+    {
+        desired_r_07[i] = mult_fr1x32x32( buf_ar_07[i], buf_br_07[i])
+                        + neg_mult_fr1x32x32(buf_ai_07[i], buf_bi_07[i]);
+    }
+
+        // test subtraction. Sample is less than NUMSAMPLE_07 to test the count parameter
+    fr32_vector_complex_mul( buf_ar_07, buf_ai_07, buf_br_07, buf_bi_07,
+                                output_r, output_i, NUMSAMPLE_07);
+
+    for ( i=0; i<NUMSAMPLE_07; i++)
+    {
+        if ( output_r[i] != desired_r_07[i] )
+        {
+            printf( "test_07 NG :output_r[%2d] = 0x%08X but should be 0x%08X\n", i, output_r[i], desired_r_07[i] );
+            return;
+        }
+        if ( output_i[i] != desired_i_07[i] )
+        {
+            printf( "test_07 NG :output_i[%2d] = 0x%08X but should be 0x%08X\n", i, output_i[i], desired_i_07[i] );
+            return;
+        }
+    }
+    printf ("test_07 OK\n");
+}
+
+#undef TAPS_07
+#undef NUMSAMPLE_07
+
diff --git a/algorithm_vector/main.c b/algorithm_vector/main.c

index 9a1b082..af6a251 100644 (file)
--- a/algorithm_vector/main.c
+++ b/algorithm_vector/main.c
@@ -23,6 +23,8 @@ int main()
      test_04_fr32_vector_sub_svv();
      test_05_fr32_vector_mul();
      test_06_fr32_vector_mul_svv();
+    test_07_fr32_vector_complex_mul();
+
  
  
      return 0;
author	suikan <suikan@users.sourceforge.jp>
	Fri, 21 Feb 2014 22:55:09 +0000 (07:55 +0900)
committer	suikan <suikan@users.sourceforge.jp>
	Fri, 21 Feb 2014 22:55:09 +0000 (07:55 +0900)
algorithm_vector/Makefile		patch \| blob \| history
algorithm_vector/fr32_vector_complex_mul.S	[new file with mode: 0644]	patch \| blob
algorithm_vector/fx32_vector.c		patch \| blob \| history
algorithm_vector/fx32_vector.h		patch \| blob \| history
algorithm_vector/fx_vector_test.c		patch \| blob \| history
algorithm_vector/main.c		patch \| blob \| history