OSDN Git Service

Import UnkoTim224 unkotim224
authorStarg <starg@users.osdn.me>
Thu, 14 Feb 2019 15:18:14 +0000 (00:18 +0900)
committerStarg <starg@users.osdn.me>
Thu, 14 Feb 2019 15:18:14 +0000 (00:18 +0900)
12 files changed:
interface/w32g_ini.c
interface/w32g_pref.c
interface/w32g_utl.c
timidity/effect.c
timidity/effect.h
timidity/int_synth.c
timidity/loadsndfontini.h
timidity/readmidi.c
timidity/timidity.c
timidity/voice_effect.c
utils/fft4g.c
utils/fft4g.h

index 02cee28..35be461 100644 (file)
@@ -776,8 +776,6 @@ SaveIniFile(SETTING_PLAYER *sp,  SETTING_TIMIDITY *st)
     IniPutKeyInt(INI_SEC_TIMIDITY,"opt_modulation_update",&(st->opt_modulation_update));
     IniPutKeyInt(INI_SEC_TIMIDITY,"opt_cut_short_time",&st->opt_cut_short_time);
        IniPutKeyInt(INI_SEC_TIMIDITY,"opt_limiter",&st->opt_limiter);
-    if (st->opt_use_midi_loop_repeat)
-       st->opt_use_midi_loop_repeat = 1;
     IniPutKeyInt(INI_SEC_TIMIDITY, "opt_use_midi_loop_repeat", &st->opt_use_midi_loop_repeat);
     IniPutKeyInt(INI_SEC_TIMIDITY, "opt_midi_loop_repeat", &st->opt_midi_loop_repeat);
        
index a5997f9..2041b08 100644 (file)
@@ -1443,9 +1443,9 @@ PrefPlayerDialogProc(HWND hwnd, UINT uMess, WPARAM wParam, LPARAM lParam)
                CB_SET(IDC_COMBO_SECOND_MODE, CB_FIND(cb_info_IDC_COMBO_SECOND_MODE_num, sp_temp->SecondMode, 0));
                
         // CC/Mark loop repeat
-        CH_SET(IDC_CHECKBOX_LOOP_CC111, st_temp->opt_use_midi_loop_repeat & LF_CC111_TO_EOT);
-        CH_SET(IDC_CHECKBOX_LOOP_AB_MARK, st_temp->opt_use_midi_loop_repeat & LF_MARK_A_TO_B);
-        CH_SET(IDC_CHECKBOX_LOOP_SE_MARK, st_temp->opt_use_midi_loop_repeat & LF_MARK_S_TO_E);
+        CH_SET(IDC_CHECKBOX_LOOP_CC111, (st_temp->opt_use_midi_loop_repeat & LF_CC111_TO_EOT) != 0);
+        CH_SET(IDC_CHECKBOX_LOOP_AB_MARK, (st_temp->opt_use_midi_loop_repeat & LF_MARK_A_TO_B) != 0);
+        CH_SET(IDC_CHECKBOX_LOOP_SE_MARK, (st_temp->opt_use_midi_loop_repeat & LF_MARK_S_TO_E) != 0);
         CH_SET(IDC_CHECKBOX_LOOP_CC2, (st_temp->opt_use_midi_loop_repeat & LF_CC2_TO_CC4) != 0);
         SendMessage(hwnd, WM_COMMAND, IDC_CHECKBOX_LOOP_CC111, 0);
         EB_SET_INT(IDC_EDIT_LOOP_REPEAT, st_temp->opt_midi_loop_repeat);
index 654cb0e..9138b83 100644 (file)
@@ -783,8 +783,10 @@ ApplySettingTiMidity(SETTING_TIMIDITY *st)
        opt_mix_envelope = st->opt_mix_envelope;
        opt_modulation_update = st->opt_modulation_update;
        opt_cut_short_time = st->opt_cut_short_time;
-    opt_use_midi_loop_repeat = SetFlag(st->opt_use_midi_loop_repeat);
+#ifdef SUPPORT_LOOPEVENT
+    opt_use_midi_loop_repeat = st->opt_use_midi_loop_repeat;
     opt_midi_loop_repeat = SetValue(st->opt_midi_loop_repeat, 0, 99);
+#endif /* SUPPORT_LOOPEVENT */
        
 #if defined(WINDRV_SETUP) || defined(WINDRV)
        syn_ThreadPriority = st->syn_ThreadPriority;
@@ -1048,8 +1050,10 @@ SaveSettingTiMidity(SETTING_TIMIDITY *st)
        st->add_silent_time = add_silent_time;
        st->emu_delay_time = emu_delay_time;
        st->opt_limiter = opt_limiter;
-    st->opt_use_midi_loop_repeat = SetValue(opt_use_midi_loop_repeat, 0, 1);
+#ifdef SUPPORT_LOOPEVENT
+    st->opt_use_midi_loop_repeat = opt_use_midi_loop_repeat;
     st->opt_midi_loop_repeat = opt_midi_loop_repeat;
+#endif /* SUPPORT_LOOPEVENT */
   
        st->opt_mix_envelope = opt_mix_envelope;
        st->opt_modulation_update = opt_modulation_update;
@@ -1086,7 +1090,26 @@ SaveSettingTiMidity(SETTING_TIMIDITY *st)
        st->opt_int_synth_update = opt_int_synth_update;
 }
 
-
+void
+InitSettingTiMidity(SETTING_TIMIDITY *st)
+{
+    st->voices = voices = DEFAULT_VOICES;
+    st->output_rate = opt_output_rate = DEFAULT_RATE;
+#if defined(TWSYNSRV) || defined(TWSYNG32)
+    st->audio_buffer_bits = opt_audio_buffer_bits = DEFAULT_AUDIO_BUFFER_BITS;
+    st->opt_reverb_control = opt_reverb_control = 0; /* default off */
+    st->opt_chorus_control = opt_chorus_control = 0; /* default off */
+    st->opt_surround_chorus = opt_surround_chorus = 0; /* default off */
+    st->opt_normal_chorus_plus = opt_normal_chorus_plus = 0; /* default off */
+    st->opt_lpf_def = opt_lpf_def = 0; /* default off */
+    st->noise_sharp_type = noise_sharp_type = 0; /* default off */
+    st->opt_resample_type = opt_resample_type = 0; /* default off */
+#endif /* TWSYNSRV || TWSYNG32 */
+#ifdef SUPPORT_LOOPEVENT
+    st->opt_use_midi_loop_repeat = 0;
+    st->opt_midi_loop_repeat = 3;
+#endif /* SUPPORT_LOOPEVENT */
+}
 
 
 
@@ -1236,6 +1259,7 @@ void w32g_initialize(void)
 
     SaveSettingPlayer(sp_current);
     SaveSettingTiMidity(st_current);
+    InitSettingTiMidity(st_current);
     if(IniVersionCheck())
     {
        LoadIniFile(sp_current, st_current);
@@ -1453,6 +1477,7 @@ void w32g_initialize(void)
 
     SaveSettingPlayer(sp_current);
     SaveSettingTiMidity(st_current);
+    InitSettingTiMidity(st_current);
     if(IniVersionCheck())
     {
        LoadIniFile(sp_current, st_current);
index 6d72b3b..ed13f5b 100644 (file)
@@ -67,6 +67,7 @@ inialize_effect
 #include "effect.h"
 #include "mt19937ar.h"
 #include "sndfontini.h"
+#include "fft4g.h"
 
 #if defined(__W32__)
 #include <windows.h>
@@ -6529,7 +6530,7 @@ static void do_reverb_ex(DATA_T *buf, int32 count, InfoReverbEX *info)
 
 #define REV_EX2_LEVEL    (1.0) // total
 #define REV_EX2_ST_CROSS (0.3)
-#define REV_EX2_REV_LEVEL (0.5 * (1.0 - REV_EX2_ST_CROSS))
+#define REV_EX2_REV_LEVEL (0.25 * (1.0 - REV_EX2_ST_CROSS))
 
 double ext_reverb_ex2_level = 1.0;
 int ext_reverb_ex2_rsmode = 3;
@@ -6540,11 +6541,9 @@ int ext_reverb_ex2_fftmode = 0;
 static void do_reverb_ex2_thread(int thread_num, void *info2);
 #endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
 
-#if defined(REV_EX2_FFT)
 static void init_reverb_ex2_fft(InfoReverbEX2 *info);
 static void do_reverb_ex2_fft_thread(int thread_num, void *info2);
 static void do_reverb_ex2_fft(DATA_T *buf, int32 count, InfoReverbEX2 *info);
-#endif
 
 
 #define MYINI_LIBRARY_DEFIND_VAR
@@ -6975,7 +6974,7 @@ static void do_reverb_ex2_resample_ov2(float *in0, float *in1, float *out0, floa
 static void do_reverb_ex2_resample_ds2(float *in0, float *in1, float *out0, float *out1, int32 nframe)
 {
        int32 i, k;
-
+       
        for (i = 0, k = 0; i < nframe; i++, k += 2){            
                out0[i] = (in0[k] + in0[k + 1]) * DIV_2;
                out1[i] = (in1[k] + in1[k + 1]) * DIV_2;
@@ -6986,16 +6985,90 @@ static void do_reverb_ex2_resample_ds4(float *in0, float *in1, float *out0, floa
 {
        int32 i, k;
        
+#if (USE_X86_EXT_INTRIN >= 2) // 4samples
+       const __m128 divn = _mm_set1_ps(DIV_4);
+       for (i = 0, k = 0; i < nframe; i += 4, k += 16){
+               __m128  sum1 = _mm_load_ps(&in0[k + 0]); // v0,v1,v2,v3
+               __m128  sum2 = _mm_load_ps(&in0[k + 4]); // v4,v5,v6,v7
+               __m128  sum3 = _mm_load_ps(&in0[k + 8]); // v8,v9,v10,v11
+               __m128  sum4 = _mm_load_ps(&in0[k + 12]); // v12,v13,v14,v15
+               __m128  sum5 = _mm_load_ps(&in1[k + 0]); // v0,v1,v2,v3
+               __m128  sum6 = _mm_load_ps(&in1[k + 4]); // v4,v5,v6,v7
+               __m128  sum7 = _mm_load_ps(&in1[k + 8]); // v8,v9,v10,v11
+               __m128  sum8 = _mm_load_ps(&in1[k + 12]); // v12,v13,v14,v15    
+               //_MM_TRANSPOSE4_PS(sum1, sum2, sum3, sum4)                             
+               __m128 tmp0 = _mm_shuffle_ps(sum1, sum2, 0x44); // v0,v1,v4,v5
+               __m128 tmp2 = _mm_shuffle_ps(sum1, sum2, 0xEE); // v2,v3,v6,v7
+               __m128 tmp1 = _mm_shuffle_ps(sum3, sum4, 0x44); // v8,v9,v12,v13
+               __m128 tmp3 = _mm_shuffle_ps(sum3, sum4, 0xEE); // v10,v11,v14,v5
+               sum1 = _mm_shuffle_ps(tmp0, tmp1, 0x88); // v0,v4,v8,v12
+               sum2 = _mm_shuffle_ps(tmp0, tmp1, 0xDD); // v1,v5,v9,v13
+               sum3 = _mm_shuffle_ps(tmp2, tmp3, 0x88); // v2,v6,10,v15
+               sum4 = _mm_shuffle_ps(tmp2, tmp3, 0xDD); // v3,v7,v11,v16                               
+               //_MM_TRANSPOSE4_PS(sum5, sum6, sum7, sum8)
+               tmp0 = _mm_shuffle_ps(sum5, sum6, 0x44); // v16,....
+               tmp2 = _mm_shuffle_ps(sum5, sum6, 0xEE); // v18,....
+               tmp1 = _mm_shuffle_ps(sum7, sum8, 0x44); // v24,....
+               tmp3 = _mm_shuffle_ps(sum7, sum8, 0xEE); // v26,....
+               sum5 = _mm_shuffle_ps(tmp0, tmp1, 0x88); // v16,....
+               sum6 = _mm_shuffle_ps(tmp0, tmp1, 0xDD); // v17,....
+               sum7 = _mm_shuffle_ps(tmp2, tmp3, 0x88); // v18,....
+               sum8 = _mm_shuffle_ps(tmp2, tmp3, 0xDD); // v19,....
+               sum1 = _mm_add_ps(sum1, sum2);
+               sum3 = _mm_add_ps(sum3, sum4);
+               sum5 = _mm_add_ps(sum5, sum6);
+               sum7 = _mm_add_ps(sum7, sum8);
+               sum1 = _mm_add_ps(sum1, sum3);
+               sum5 = _mm_add_ps(sum5, sum7);
+               sum1 = _mm_mul_ps(sum1, divn);
+               sum5 = _mm_mul_ps(sum5, divn);
+               _mm_store_ps(&out0[i], sum1);
+               _mm_store_ps(&out1[i], sum5);
+       }
+#else
        for (i = 0, k = 0; i < nframe; i++, k += 4){    
                out0[i] = (in0[k] + in0[k + 1] + in0[k + 2] + in0[k + 3]) * DIV_4;
                out1[i] = (in1[k] + in1[k + 1] + in1[k + 1] + in1[k + 3]) * DIV_4;
        }
+#endif
 }
 
 static void do_reverb_ex2_resample_ds8(float *in0, float *in1, float *out0, float *out1, int32 nframe)
 {
        int32 i, k;
-
+       
+#if (USE_X86_EXT_INTRIN >= 2) // 2samples
+       const __m128 divn = _mm_set1_ps(DIV_8);
+       for (i = 0, k = 0; i < nframe; i += 2, k += 16){
+               __m128  vin1 = _mm_load_ps(&in0[k + 0]); // v0,v1,v2,v3
+               __m128  vin2 = _mm_load_ps(&in0[k + 4]); // v4,v5,v6,v7
+               __m128  vin3 = _mm_load_ps(&in0[k + 8]); // v8,v9,v10,v11
+               __m128  vin4 = _mm_load_ps(&in0[k + 12]); // v12,v13,v14,v15
+               __m128  vin5 = _mm_load_ps(&in1[k + 0]); // v0,v1,v2,v3
+               __m128  vin6 = _mm_load_ps(&in1[k + 4]); // v4,v5,v6,v7
+               __m128  vin7 = _mm_load_ps(&in1[k + 8]); // v8,v9,v10,v11
+               __m128  vin8 = _mm_load_ps(&in1[k + 12]); // v12,v13,v14,v15
+               __m128 sum1 = _mm_add_ps(vin1, vin2); // v0v4,v1v5,v2v6,v3v7
+               __m128 sum2 = _mm_add_ps(vin3, vin4); // v8v12,v9v13,v10v14,v11v15
+               __m128 sum3 = _mm_add_ps(vin5, vin6); // v0v4,v1v5,v2v6,v3v7
+               __m128 sum4 = _mm_add_ps(vin7, vin8); // v8v12,v9v13,v10v14,v11v15
+               //_MM_TRANSPOSE4_PS(sum1, sum2, sum3, sum4)                             
+               __m128 tmp0 = _mm_shuffle_ps(sum1, sum2, 0x44); // v0,v1,v4,v5
+               __m128 tmp2 = _mm_shuffle_ps(sum1, sum2, 0xEE); // v2,v3,v6,v7
+               __m128 tmp1 = _mm_shuffle_ps(sum3, sum4, 0x44); // v8,v9,v12,v13
+               __m128 tmp3 = _mm_shuffle_ps(sum3, sum4, 0xEE); // v10,v11,v14,v15
+               sum1 = _mm_shuffle_ps(tmp0, tmp1, 0x88); // v0v4,v8v12,v0v4,v8v12
+               sum2 = _mm_shuffle_ps(tmp0, tmp1, 0xDD); // v1v5,v9v13,v1v5,v9v13
+               sum3 = _mm_shuffle_ps(tmp2, tmp3, 0x88); // v2v6,v10v15,v2v6,v10v15
+               sum4 = _mm_shuffle_ps(tmp2, tmp3, 0xDD); // v3v7,v11v16,v3v7,v11v16                     
+               sum1 = _mm_add_ps(sum1, sum2);
+               sum3 = _mm_add_ps(sum3, sum4);
+               sum1 = _mm_add_ps(sum1, sum3);
+               sum1 = _mm_mul_ps(sum1, divn);
+               _mm_storel_pi((__m64*)&out0[i], sum1);
+               _mm_storeh_pi((__m64*)&out1[i], sum1);
+       }
+#else
        for (i = 0, k = 0; i < nframe; i++, k += 8){    
                out0[i] = (
                        in0[k    ] + in0[k + 1] + in0[k + 2] + in0[k + 3] + 
@@ -7004,6 +7077,7 @@ static void do_reverb_ex2_resample_ds8(float *in0, float *in1, float *out0, floa
                        in1[k    ] + in1[k + 1] + in1[k + 1] + in1[k + 3] +
                        in1[k + 4] + in1[k + 5] + in1[k + 6] + in1[k + 7]) * DIV_8;
        }
+#endif
 }
 
 void free_reverb_ex2(InfoReverbEX2 *info)
@@ -7021,8 +7095,6 @@ void free_reverb_ex2(InfoReverbEX2 *info)
                if(info->tbuf[i] != NULL){ safe_free(info->tbuf[i]); info->tbuf[i] = NULL; }    
 #endif // USE_X86_EXT_INTRIN
        }
-
-#if defined(REV_EX2_FFT)
        for(i = 0; i < 2; i++){
 #if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
                if(info->rvs[i] != NULL){ aligned_free(info->rvs[i]); info->rvs[i] = NULL; }
@@ -7034,6 +7106,8 @@ void free_reverb_ex2(InfoReverbEX2 *info)
                if(info->fi[i] != NULL){ aligned_free(info->fi[i]); info->fi[i] = NULL; }
                if(info->bd[i] != NULL){ aligned_free(info->bd[i]); info->bd[i] = NULL; }       
                if(info->ios[i] != NULL){ aligned_free(info->ios[i]); info->ios[i] = NULL; }
+               if(info->fftw[i] != NULL){ aligned_free(info->fftw[i]); info->fftw[i] = NULL; }
+               if(info->ffti[i] != NULL){ aligned_free(info->ffti[i]); info->ffti[i] = NULL; }
 #else
                if(info->rvs[i] != NULL){ safe_freeinfo->rvs[i]); info->rvs[i] = NULL; }
                if(info->rs[i] != NULL){ safe_freeinfo->rs[i]); info->rs[i] = NULL; }
@@ -7044,15 +7118,10 @@ void free_reverb_ex2(InfoReverbEX2 *info)
                if(info->fi[i] != NULL){ safe_freeinfo->fi[i]); info->fi[i] = NULL; }
                if(info->bd[i] != NULL){ safe_freeinfo->bd[i]); info->bd[i] = NULL; }   
                if(info->ios[i] != NULL){ safe_freeinfo->ios[i]); info->ios[i] = NULL; }        
+               if(info->fftw[i] != NULL){ safe_freeinfo->fftw[i]); info->fftw[i] = NULL; }
+               if(info->ffti[i] != NULL){ safe_freeinfo->ffti[i]); info->ffti[i] = NULL; }
 #endif // USE_X86_EXT_INTRIN
        }
-#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
-       if(info->sint != NULL){ aligned_free(info->sint); info->sint = NULL; }
-#else
-       if(info->sint != NULL){ safe_free(info->sint); info->sint = NULL; }
-#endif // USE_X86_EXT_INTRIN
-#endif // defined(REV_EX2_FFT)
-
 #if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)        
        reset_effect_sub_thread(do_reverb_ex2_thread, info);
        info->thread = 0;
@@ -7072,7 +7141,6 @@ static void init_reverb_ex2(InfoReverbEX2 *info)
        int32 amp = 100;
        TCHAR path[FILEPATH_MAX] = {0};
        
-#if defined(REV_EX2_FFT)
        if(ext_reverb_ex2_fftmode){
                init_reverb_ex2_fft(info);
                return;
@@ -7080,7 +7148,7 @@ static void init_reverb_ex2(InfoReverbEX2 *info)
        if(info->fftmode)
                free_reverb_ex2(info);
        info->fftmode = 0;
-#endif
+
        if(info->init){
                if(info->pmr_p != play_mode->rate || info->rt_p != info->revtype)
                        free_reverb_ex2(info);
@@ -7941,12 +8009,10 @@ static void do_reverb_ex2_thread(int thread_num, void *info2)
        info = (InfoReverbEX2 *)info2;
        if(!info->init)
                return;
-#if defined(REV_EX2_FFT)
        if(info->fftmode){
                do_reverb_ex2_fft_thread(thread_num, info2);
                return;
        }
-#endif
        if(thread_num >= (info->thread + info->ithread))
                return; 
        if(info->ithread){
@@ -7977,12 +8043,10 @@ static void do_reverb_ex2(DATA_T *buf, int32 count, InfoReverbEX2 *info)
                return; 
        else if(!info->init)
                return;
-#if defined(REV_EX2_FFT)
        if(info->fftmode){
                do_reverb_ex2_fft(buf, count, info);
                return;
        }
-#endif
        info->ptr = buf;
        info->count = count;
        info->tcount = count >> (1 + info->rsmode);     
@@ -8001,161 +8065,90 @@ static void do_reverb_ex2(DATA_T *buf, int32 count, InfoReverbEX2 *info)
 }
 
 
-#if defined(REV_EX2_FFT)
-// freeverb3 irmodel2zl.cpp \82ð\8eQ\8dl\82É\8f\91\82¢\82Ä\82Ý\82½\82ª\81E\81E
+// REV_EX2_FFT
+// freeverb3 irmodel2zl.cpp irmodel2.cpp \82ð\8eQ\8dl\82É
 
-
-#define REV_EX2_FFT_LEVEL (1. * (1.0 - REV_EX2_ST_CROSS))
-#define REV_EX2_FRAGBIT (12) // 10 ~ 14 
+#define REV_EX2_FFT_LEVEL (0.25 * (1.0 - REV_EX2_ST_CROSS))
+#define REV_EX2_FRAGBIT (10) // 10 ~ 14 
 #define REV_EX2_FRAGSIZE (1 << REV_EX2_FRAGBIT) // 2^REV_EX2_FRAGBIT > synthbuffer size
 #define REV_EX2_FFTSIZE (REV_EX2_FRAGSIZE << 1)
 
-static void do_reverb_ex2__fft(float *fft, float *st)
+static void do_reverb_ex2_rdft(float *fft, int d, int *ip, float *w)
 {      
-       const int32 cosofs = REV_EX2_FRAGSIZE >> 2;
-       const uint32 stmask = REV_EX2_FRAGSIZE - 1;
-       float *ar = fft;
-       float *ai = fft + REV_EX2_FRAGSIZE;
-       int i = 0, j, k, m, mh, irev;
-       float xr, xi;
-
-       for (j = 1; j < (REV_EX2_FRAGSIZE - 1); j++){
-               for(k = (REV_EX2_FRAGSIZE >> 1); k > (i ^= k); k >>= 1){}
-               if(j < i){
-                       xr = *(ar + j);
-                       xi = *(ai + j);
-                       *(ar + j) = *(ar + i);
-                       *(ai + j) = *(ai + i);
-                       *(ar + i) = xr;
-                       *(ai + i) = xi;
-               }
-       }
-       for(mh = 1; (m = mh << 1) <= REV_EX2_FRAGSIZE; mh = m){
-               irev = 0;
-               for(i = 0; i < REV_EX2_FRAGSIZE; i += m){
-                       float tsin = st[irev & stmask];
-                       float tcos = st[(irev + cosofs) & stmask];
-                       for(k = (REV_EX2_FRAGSIZE >> 2); k > (irev ^= k); k >>= 1){}
-                       for(j = i; j < mh + i; j++){
-                               k = j + mh;
-                               xr = *(ar + j) - *(ar + k);
-                               xi = *(ai + j) - *(ai + k);
-                               *(ar + j) += *(ar + k);
-                               *(ai + j) += *(ai + k);
-                               *(ar + k) = tcos * xr - tsin * xi;
-                               *(ai + k) = tcos * xi + tsin * xr;
-                       }
-               }
-       }
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+       rdft_simd(REV_EX2_FFTSIZE, d, fft, ip, w);
+#else
+       rdft(REV_EX2_FFTSIZE, d, fft, ip, w);
+#endif
 }
 
-static void do_reverb_ex2_R2HC(float *iL, float *oL, float *st)
+static void do_reverb_ex2_R2HC(float *iL, float *oL, int *ip, float *w)
 {      
        const int32 fbyte = sizeof(float) * REV_EX2_FRAGSIZE;
-       int32 i = 0;
+       const int32 ribyte = sizeof(float) * REV_EX2_FFTSIZE;
+       int32 i, k;
        ALIGN float fo[REV_EX2_FFTSIZE] = {0};
+       float *or = oL;
+       float *oi = oL + REV_EX2_FRAGSIZE;
 
        memcpy(fo, iL, fbyte);
-       do_reverb_ex2__fft(fo, st);
-#if 0
-       for(i = 0; i < REV_EX2_FRAGSIZE; i++){
-               oL[i        ] = fo[i]; 
-               oL[REV_EX2_FFTSIZE - 1 - 1] = fo[REV_EX2_FFTSIZE - 1 - i];
-       }
-#elif 0
-    oL[0] = fo[0]; 
-       oL[1] = fo[REV_EX2_FRAGSIZE];
-       for(i = 1; i < REV_EX2_FRAGSIZE; i++){
-               oL[2 * i    ] = fo[REV_EX2_FFTSIZE - i]; 
-               oL[2 * i + 1] = fo[REV_EX2_FFTSIZE - i];
+       do_reverb_ex2_rdft(fo, 1, ip, w);
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+       for(i = 0, k = 0; i < REV_EX2_FRAGSIZE; i += 4, k += 8){
+               __m128 vin0 = _mm_load_ps(&fo[k]); // [0123]
+               __m128 vin1 = _mm_load_ps(&fo[k + 4]); // [4567]
+               vin0 = _mm_shuffle_ps(vin0, vin0, 0xD8); // [0213]
+               vin1 = _mm_shuffle_ps(vin1, vin1, 0xD8); // [4657]
+               _mm_store_ps(&or[i], _mm_shuffle_ps(vin0, vin1, 0x44)); // [0246]
+               _mm_store_ps(&oi[i], _mm_shuffle_ps(vin0, vin1, 0xEE)); // [1357]
        }
-#elif 0
+#else
        for(i = 0; i < REV_EX2_FRAGSIZE; i++){
-               oL[2 * i    ] = fo[i]; 
-               oL[2 * i + 1] = fo[REV_EX2_FFTSIZE - 1 - i];
+               or[i] = fo[2 * i    ]; 
+               oi[i] = fo[2 * i + 1];
        }
-#else
-       memcpy(oL, fo, sizeof(float) * REV_EX2_FFTSIZE);
 #endif
-
 }
 
-static void do_reverb_ex2_HC2R(float *iL, float *oL, float *st)
+static void do_reverb_ex2_HC2R(float *iL, float *oL, int *ip, float *w)
 {
-       int32 i = 0;
+       const int32 ribyte = sizeof(float) * REV_EX2_FFTSIZE;
+       int32 i, k;
        ALIGN float fo[REV_EX2_FFTSIZE] = {0};
+       float *ir = iL;
+       float *ii = iL + REV_EX2_FRAGSIZE;
        
-#if 0
-       for(i = 0; i < REV_EX2_FRAGSIZE; i++){
-               fo[i] = iL[i        ]; 
-               fo[REV_EX2_FFTSIZE - 1 - 1] = iL[REV_EX2_FFTSIZE - 1 - i];
-       }
-
-#elif 0
-       fo[0] = iL[0];
-       fo[REV_EX2_FRAGSIZE] = iL[1];
-       for(i = 1; i < REV_EX2_FRAGSIZE; i++){
-               fo[REV_EX2_FFTSIZE - i] = iL[2 * i    ];
-               fo[REV_EX2_FFTSIZE - i] = iL[2 * i + 1];
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+       for(i = 0, k = 0; i < REV_EX2_FRAGSIZE; i += 4, k += 8){
+               __m128 vin0 = _mm_load_ps(&ir[i]); // [0246]
+               __m128 vin1 = _mm_load_ps(&ii[i]); // [1357]
+               __m128 vt0 = _mm_shuffle_ps(vin0, vin1, 0x44); // [0213]
+               __m128 vt1 = _mm_shuffle_ps(vin0, vin1, 0xEE); // [4657]                
+               _mm_store_ps(&fo[k    ], _mm_shuffle_ps(vt0, vt0, 0xD8)); // [0123]
+               _mm_store_ps(&fo[k + 4], _mm_shuffle_ps(vt1, vt1, 0xD8)); // [4567]
        }
-#elif 0
+#else
        for(i = 0; i < REV_EX2_FRAGSIZE; i++){
-               fo[i                      ] = iL[2 * i    ]; 
-               fo[REV_EX2_FFTSIZE - 1 - i] = iL[2 * i + 1];
+               fo[2 * i    ] = ir[i]; 
+               fo[2 * i + 1] = ii[i];
        }
+#endif
+       do_reverb_ex2_rdft(fo, -1, ip, w);
+#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+       for(i = 0; i < REV_EX2_FFTSIZE; i += 8)
+               MM256_LS_ADD_PS(&oL[i], _mm256_load_ps(&fo[i]));
+#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+       for(i = 0; i < REV_EX2_FFTSIZE; i += 4)
+               MM_LS_ADD_PS(&oL[i], _mm_load_ps(&fo[i]));
 #else
-       memcpy(fo, iL, sizeof(float) * REV_EX2_FFTSIZE);
+       for(i = 0; i < REV_EX2_FFTSIZE; i++)
+               oL[i] += fo[i];
 #endif
-       do_reverb_ex2__fft(fo, st);
-       {
-               float *pfor = fo;
-               float *pfoi = fo + REV_EX2_FRAGSIZE;
-               float *or = oL;
-               float *oi = oL + REV_EX2_FRAGSIZE;
-               const float divfr = 1.0;// / (double)REV_EX2_FRAGSIZE;
-               const float divfi = -1.0;// / (double)REV_EX2_FRAGSIZE;
-               for(i = 0; i < REV_EX2_FFTSIZE; i++){
-               //      or[i] += pfor[i] * divfi;
-                       or[REV_EX2_FFTSIZE - 1 - i] += pfor[i] * divfr;
-               //      oi[i] += pfoi[i] * divfi;
-               }
-       }
 }
 
 static void do_reverb_ex2_mul(float *iL, float *fL, float *oL)
 {
        int32 i;
-#if 0
-       float tL0 = oL[0] + iL[0] * fL[0];
-       float tL1 = oL[1] + iL[1] * fL[1];
-#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
-       const __m128 vm1 = _mm_set_ps(1, -1, 1, -1);
-       for(i = 0; i < REV_EX2_FRAGSIZE; i += 2){
-               __m128 vo = _mm_load_ps(&oL[2 * i]);
-               __m128 vi = _mm_loadu_ps(&iL[2 * i]);
-               __m128 vf0 = _mm_load_ps(&fL[2 * i]);
-               __m128 vf1 = _mm_shuffle_ps(vf0, vf0, 0xB1);
-               __m128 vi0 = _mm_shuffle_ps(vi, vi, 0xA0);
-               __m128 vi1 = _mm_shuffle_ps(vi, vi, 0xF5);      
-               vf1 = _mm_mul_ps(vf1, vm1);
-               vo = MM_FMA_PS(vi0, vf0, vo);
-               vo = MM_FMA_PS(vi1, vf1, vo);
-               _mm_store_ps(&oL[2 * i], vo);   
-       }
-#else
-       for(i = 0; i < REV_EX2_FRAGSIZE; i++){
-               float i0 = iL[2 * i + 0];
-               float i1 = iL[2 * i + 1];
-               float f0 = fL[2 * i + 0];
-               float f1 = fL[2 * i + 1];
-               oL[2 * i + 0] += i0 * f0 - i1 * f1;
-               oL[2 * i + 1] += i0 * f1 + i1 * f0;             
-       }
-#endif
-       oL[0] = tL0;
-       oL[1] = tL1;
-
-#else  
        float *ir = iL;
        float *ii = iL + REV_EX2_FRAGSIZE;
        float *fr = fL;
@@ -8165,7 +8158,24 @@ static void do_reverb_ex2_mul(float *iL, float *fL, float *oL)
        float tor = or[0] + ir[0] * fr[0];
        float toi = oi[0] + ii[0] * fi[0];
 
-#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+#if (USE_X86_EXT_INTRIN >= 8) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+       const __m256 vm1 = _mm256_set1_ps(-1);
+       for(i = 0; i < REV_EX2_FRAGSIZE; i += 8){
+               __m256 vir = _mm256_load_ps(&ir[i]);
+               __m256 vii = _mm256_load_ps(&ii[i]);
+               __m256 vfr = _mm256_load_ps(&fr[i]);
+               __m256 vfi = _mm256_load_ps(&fi[i]);
+               __m256 vor = _mm256_load_ps(&or[i]);
+               __m256 voi = _mm256_load_ps(&oi[i]);
+               __m256 vfm = _mm256_mul_ps(vfi, vm1);
+               vor = MM256_FMA_PS(vir, vfr , vor);
+               vor = MM256_FMA_PS(vii, vfm , vor);
+               voi = MM256_FMA_PS(vir, vfi, voi);
+               voi = MM256_FMA_PS(vii, vfr, voi);
+               _mm256_store_ps(&or[i], vor);
+               _mm256_store_ps(&oi[i], voi);
+       }
+#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
        const __m128 vm1 = _mm_set1_ps(-1);
        for(i = 0; i < REV_EX2_FRAGSIZE; i += 4){
                __m128 vir = _mm_load_ps(&ir[i]);
@@ -8194,8 +8204,6 @@ static void do_reverb_ex2_mul(float *iL, float *fL, float *oL)
 #endif
        or[0] = tor;
        oi[0] = toi;
-
-#endif
 }
 
 static float* do_reverb_ex2_delay(float *in, int32 prev, float *dbuf, int32 *bcount, int32 bnum)
@@ -8312,7 +8320,7 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info)
 #else
                ndata[0] = (float *) safe_large_malloc(nbytes);
                ndata[1] = (float *) safe_large_malloc(nbytes);
-#endif         
+#endif         fnum
                if(!ndata[0] || !ndata[0])
                        goto error;
                memset(ndata[0], 0, nbytes);
@@ -8357,7 +8365,7 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info)
                        if(info->irdata[i] != NULL){ safe_free(info->irdata[i]); info->irdata[i] = NULL; }
 #endif         
                }
-               info->frame = nframe; 
+               info->frame = tframe; 
                info->srate = rsrate;
                info->irdata[0] = ndata[0];
                info->irdata[1] = ndata[1];
@@ -8396,7 +8404,7 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info)
        // create buffers
        fnum = info->frame / REV_EX2_FRAGSIZE;
        bytes = sizeof(float) * (REV_EX2_FRAGSIZE + 8);
-       ibytes = sizeof(int32) * REV_EX2_FRAGSIZE;
+       ibytes = sizeof(int) * (REV_EX2_FRAGSIZE + 8);
        for(i = 0; i < 2; i++){
 #if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
                info->rvs[i] = (float *) aligned_malloc(bytes * 2, ALIGN_SIZE);
@@ -8408,6 +8416,8 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info)
                info->fi[i] = (float *) aligned_malloc(bytes * 2 * fnum, ALIGN_SIZE);
                info->bd[i] = (float *) aligned_malloc(bytes * 2 * fnum, ALIGN_SIZE);
                info->ios[i] = (float *) aligned_malloc(bytes * 3, ALIGN_SIZE);
+               info->fftw[i] = (float *) aligned_malloc(bytes * 2, ALIGN_SIZE);
+               info->ffti[i] = (int *) aligned_malloc(ibytes * 2, ALIGN_SIZE);
 #else
                info->rvs[i] = (float *) safe_large_malloc(bytes * 2);
                info->rs[i] = (float *) safe_large_malloc(bytes * 2);
@@ -8418,9 +8428,12 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info)
                info->ios[i] = (float *) safe_large_malloc(bytes * 3);
                info->fi[i] = (float *) safe_large_malloc(bytes * 2 * fnum);
                info->bd[i] = (float *) safe_large_malloc(bytes * 2 * fnum);
+               info->fftw[i] = (float *) safe_large_malloc(bytes * 2);
+               info->ffti[i] = (int *) safe_large_malloc(ibytes * 2);
 #endif
                if(!info->rvs[i] || !info->rs[i] || !info->is[i] || !info->ss[i] || !info->os[i]
-                       || !info->fs[i] || !info->fi[i] || !info->bd[i] || !info->ios[i] 
+                       || !info->fs[i] || !info->fi[i] || !info->bd[i] || !info->ios[i]
+                       || !info->fftw[i] || !info->ffti[i]
                        ){
                        goto error;
                }
@@ -8433,26 +8446,15 @@ static void init_reverb_ex2_fft(InfoReverbEX2 *info)
                memset(info->fi[i], 0, bytes * 2 * fnum);
                memset(info->bd[i], 0, bytes * 2 * fnum);
                memset(info->ios[i], 0, bytes * 3);
-       }
-       // sin table
-#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
-       info->sint = (float *) aligned_malloc(bytes, ALIGN_SIZE);
-#else
-       info->sint = (float *) safe_large_malloc(bytes);
-#endif
-       if(!info->sint)
-               goto error;
-       memset(info->sint, 0, bytes);
-       for(i = 0; i < REV_EX2_FRAGSIZE; i++){ 
-               const double rad = M_PI * 2.0 / REV_EX2_FRAGSIZE;
-               info->sint[i] = (float)((double)sin(rad * i));
+               memset(info->fftw[i], 0, bytes * 2);
+               memset(info->ffti[i], 0, ibytes * 2);
        }
        // impulse
        for(i = 0; i < fnum; i++){
                int32 fofs = REV_EX2_FRAGSIZE * i;
                int32 riofs = REV_EX2_FFTSIZE * i;
-               do_reverb_ex2_R2HC(info->irdata[0] + fofs, info->fi[0] + riofs, info->sint);    
-               do_reverb_ex2_R2HC(info->irdata[1] + fofs, info->fi[1] + riofs, info->sint);    
+               do_reverb_ex2_R2HC(info->irdata[0] + fofs, info->fi[0] + riofs, info->ffti[0], info->fftw[0]);  
+               do_reverb_ex2_R2HC(info->irdata[1] + fofs, info->fi[1] + riofs, info->ffti[1], info->fftw[1]);  
        }
        info->fnum = fnum;
        // create input/output buffers  
@@ -8517,17 +8519,17 @@ static void do_reverb_ex2_fft_process1(int32 ofs, int32 count, int32 ch, InfoRev
                memset(info->ss[ch], 0, ribyte);                
                memset(info->rvs[ch] + REV_EX2_FRAGSIZE - 1, 0, sizeof(float) * (REV_EX2_FRAGSIZE + 1));
                for(i = 1; i < info->fnum; i++){
-                       float *bd0 = do_reverb_ex2_delay(info->is[ch], i - 1, info->bd[ch], &info->bdcount[ch], info->fnum);
-                       do_reverb_ex2_mul(bd0, info->fi[ch] + REV_EX2_FFTSIZE * i, info->ss[ch]);
+                       float *bd = do_reverb_ex2_delay(info->is[ch], i - 1, info->bd[ch], &info->bdcount[ch], info->fnum);
+                       do_reverb_ex2_mul(bd, info->fi[ch] + REV_EX2_FFTSIZE * i, info->ss[ch]);
                }
        }
-       memset(info->os[ch], 0, fbyte); 
+       memset(info->os[ch], 0, f2byte);        
        memcpy(info->fs[ch] + info->scount[ch], input, cbyte);
        memcpy(info->os[ch] + info->scount[ch], input, cbyte);
-       do_reverb_ex2_R2HC(info->os[ch], info->is[ch], info->sint);             
+       do_reverb_ex2_R2HC(info->os[ch], info->is[ch], info->ffti[ch], info->fftw[ch]);         
     do_reverb_ex2_mul(info->is[ch], info->fi[ch], info->ss[ch]);
-       memset(info->rvs[ch], 0, fbyte);
-       do_reverb_ex2_HC2R(info->ss[ch], info->rvs[ch], info->sint);
+       memset(info->rvs[ch], 0, f2byte);
+       do_reverb_ex2_HC2R(info->ss[ch], info->rvs[ch], info->ffti[ch], info->fftw[ch]);
        rvsc = info->rvs[ch] + info->scount[ch];
        rsc = info->rs[ch] + info->scount[ch];
 #if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
@@ -8539,8 +8541,7 @@ static void do_reverb_ex2_fft_process1(int32 ofs, int32 count, int32 ch, InfoRev
 #endif
        info->scount[ch] += count;
        if(info->scount[ch] == REV_EX2_FRAGSIZE){
-               const int32 fbyte2 = sizeof(float) * (REV_EX2_FRAGSIZE - 1);
-               do_reverb_ex2_R2HC(info->fs[ch], info->is[ch], info->sint);  
+               do_reverb_ex2_R2HC(info->fs[ch], info->is[ch], info->ffti[ch], info->fftw[ch]);  
                memcpy(info->rs[ch], info->rvs[ch] + REV_EX2_FRAGSIZE, sizeof(float) * (REV_EX2_FRAGSIZE - 1));         
                info->scount[ch] = 0;
        }
@@ -8556,24 +8557,26 @@ static void do_reverb_ex2_fft_process2(int32 count, int32 ch, InfoReverbEX2 *inf
        const int32 f2byte = sizeof(float) * REV_EX2_FRAGSIZE * 2;
        const int32 cbyte = sizeof(float) * count; 
        const int32 ribyte = sizeof(float) * REV_EX2_FFTSIZE;
-       
-       memcpy(info->ios[ch] + info->scount[ch] + REV_EX2_FRAGSIZE, input, cbyte);
+       float *iobuf = info->ios[ch] + REV_EX2_FRAGSIZE;
+
+       memcpy(iobuf + info->scount[ch], input, cbyte);
        if((info->scount[ch] + count) >= REV_EX2_FRAGSIZE) {
-               do_reverb_ex2_R2HC(info->ios[ch] + REV_EX2_FRAGSIZE, info->is[ch], info->sint); 
+               do_reverb_ex2_R2HC(iobuf, info->is[ch], info->ffti[ch], info->fftw[ch]);        
                memset(info->ss[ch], 0, ribyte);        
-               for(i = 1; i < info->fnum; i++){
-                       float *bd0 = do_reverb_ex2_delay(info->is[ch], i - 1, info->bd[ch], &info->bdcount[ch], info->fnum);
-                       do_reverb_ex2_mul(bd0, info->fi[ch] + REV_EX2_FFTSIZE * i, info->ss[ch]);
+               for(i = 0; i < info->fnum; i++){
+                       float *bd = do_reverb_ex2_delay(info->is[ch], i, info->bd[ch], &info->bdcount[ch], info->fnum);                 
+                       do_reverb_ex2_mul(bd, info->fi[ch] + REV_EX2_FFTSIZE * i, info->ss[ch]);
                }
-               do_reverb_ex2_HC2R(info->ss[ch], info->rvs[ch], info->sint);
-               memcpy(info->ios[ch] + REV_EX2_FRAGSIZE, info->rvs[ch], fbyte);
+               do_reverb_ex2_HC2R(info->ss[ch], info->rvs[ch], info->ffti[ch], info->fftw[ch]);
+               memcpy(iobuf, info->rvs[ch], fbyte);
                memcpy(info->rvs[ch], info->rvs[ch] + REV_EX2_FRAGSIZE, fbyte);
                memset(info->rvs[ch] + REV_EX2_FRAGSIZE - 1, 0, sizeof(float) * (REV_EX2_FRAGSIZE + 1));
        }
        memcpy(output, info->ios[ch] + info->scount[ch], cbyte);
        info->scount[ch] += count;
        if(info->scount[ch] >= REV_EX2_FRAGSIZE) {
-               memcpy(info->ios[ch], info->ios[ch] + REV_EX2_FRAGSIZE, f2byte);
+               memcpy(info->ios[ch], iobuf, f2byte);
+               memset(iobuf + REV_EX2_FRAGSIZE, 0, fbyte);
                info->scount[ch] -= REV_EX2_FRAGSIZE;
        }
        return;
@@ -8819,9 +8822,6 @@ static void do_reverb_ex2_fft(DATA_T *buf, int32 count, InfoReverbEX2 *info)
        do_reverb_ex2_post_process(buf, count, info);
 }
 
-#endif
-
-
 
 
 /*                      */
index 41fe9da..38ab00e 100644 (file)
@@ -702,7 +702,6 @@ typedef struct _InfoReverbEX{
 } InfoReverbEX;
 
 
-//#define REV_EX2_FFT
 typedef struct _InfoReverbEX2{
        int8 mode;
        int32 revtype;
@@ -713,11 +712,10 @@ typedef struct _InfoReverbEX2{
        float *irdata[2], *buf[2], *tbuf[2]; // buf:delay(in)*2 , tbuf:out*2
        FLOAT_T rsfb[2];
        DATA_T *ptr;
-#if defined(REV_EX2_FFT)       // fft
        int32 fnum, scount[2], bdcount[2];
        float *fs[2], *ss[2], *rvs[2], *rs[2], *is[2], *os[2], *fi[2], *bd[2], *ios[2];
-       float *sint;
-#endif
+       float *fftw[2];
+       int *ffti[2];
 } InfoReverbEX2;
 
 
index c18753a..18b0825 100644 (file)
@@ -4823,7 +4823,7 @@ static inline void compute_op_null(Info_OP *info){}
 
 static inline void compute_op_wave_none(Info_OP *info)
 {
-       FLOAT_T osc, lfo1, lfo2;
+       FLOAT_T osc;
        
        info->in = 0.0; // clear
        info->rate += info->freq; // +1/sr = 1Hz
@@ -4835,7 +4835,7 @@ static inline void compute_op_wave_none(Info_OP *info)
 
 static inline void compute_op_wave_fm(Info_OP *info)
 {
-       FLOAT_T osc, rt;
+       FLOAT_T osc;
        FLOAT_T in = info->in;
        
        info->in = 0.0; // clear
@@ -4922,7 +4922,7 @@ static inline void compute_op_wave_lowbit(Info_OP *info)
 
 static inline void compute_op_scc_none(Info_OP *info)
 {
-       FLOAT_T osc, lfo1, lfo2;
+       FLOAT_T osc;
        
        info->in = 0.0; // clear
        info->rate += info->freq; // +1/sr = 1Hz
@@ -4983,10 +4983,7 @@ static inline void compute_op_scc_ampm(Info_OP *info)
        info->rate += info->freq; // +1/sr = 1Hz
        RESET_OP_RATE
        rt = info->rate + in * info->mod_level; // mod level;
-       if(rt >= 1.0)
-               rt -= floor(rt);
-       else if(rt < 0.0)
-               rt += floor(rt);
+       rt -= floor(rt);
        osc = info->scc_ptr(calc_op_width(info, rt), info->data_ptr);
        osc *= (1.0 - ((FLOAT_T)in * DIV_2 + 0.5) * info->mod_level);
        op_filter(&info->fc, &osc);
@@ -5039,7 +5036,7 @@ static inline void compute_op_pcm_fm(Info_OP *info)
        FLOAT_T in = info->in; // 
        
        info->in = 0.0; // clear
-       info->rate += info->freq * (1.0 + (FLOAT_T)in * info->mod_level); // +1/sr*pcm_rate/root_freq = 1Hz
+       info->pcm_rate = info->rate += info->freq * (1.0 + (FLOAT_T)in * info->mod_level); // +1/sr*pcm_rate/root_freq = 1Hz
        osc = compute_pcm_linear(info);
        op_filter(&info->fc, &osc);
        compute_op_output(info, osc * info->amp_vol); // include info->op_level
index 0748e60..9729407 100644 (file)
        ext_reverb_ex_mod = MyIni_GetInt32(sec, "Ext_EX_Mod", 0);
        // reverb ex2   
        ext_reverb_ex2_level = MyIni_GetFloat32(sec, "Ext_SR_Level", 1.0);
-       ext_reverb_ex2_rsmode = MyIni_GetInt32(sec, "Ext_SR_RS_Mode", 3);
-       ext_reverb_ex2_fftmode = MyIni_GetInt32(sec, "Ext_SR_FFT_Mode", 0);
+       ext_reverb_ex2_rsmode = MyIni_GetInt32(sec, "Ext_SR_RS_Mode", 0);
+       ext_reverb_ex2_fftmode = MyIni_GetInt32(sec, "Ext_SR_FFT_Mode", 1);
        // plate reverb
        ext_plate_reverb_level = MyIni_GetFloat32(sec, "Ext_Plate_Level", 1.0);
        ext_plate_reverb_level = MyIniParamRange(ext_plate_reverb_level, 0.001, 8.0);
index 00eedf1..1a1a0ef 100644 (file)
@@ -5083,6 +5083,7 @@ static int read_smf_track(struct timidity_file *tf, int trackno, int rewindp)
     int i;
     int32 smf_at_time;
     int note_seen = (! opt_preserve_silence);
+    int hascc111;
 
     smf_at_time = readmidi_set_track(trackno, rewindp);
 
@@ -5103,6 +5104,7 @@ static int read_smf_track(struct timidity_file *tf, int trackno, int rewindp)
     }
 
     lastchan = laststatus = 0;
+    hascc111 = 0;
 
     for(;;)
     {
@@ -5279,6 +5281,8 @@ static int read_smf_track(struct timidity_file *tf, int trackno, int rewindp)
                    break;
 
                  case 0x2F: /* End of Track */
+            if (hascc111 != 0)
+                MIDIEVENT(smf_at_time, ME_NONE, 0, 0, 0);
                    pos = tf_tell(tf);
                    if(pos < next_pos)
                        tf_seek(tf, next_pos - pos, SEEK_CUR);
@@ -5408,6 +5412,12 @@ static int read_smf_track(struct timidity_file *tf, int trackno, int rewindp)
              case 3: /* Control change */
                b = tf_getc(tf);
                readmidi_add_ctl_event(smf_at_time, lastchan, a, b);
+        if (a == 111) {
+            if (hascc111 == 0)
+                ctl->cmsg(CMSG_INFO, VERB_DEBUG,
+                            "Detection loop start event CC#111");
+            hascc111 = 1;
+        }
                break;
 
              case 4: /* Program change */
index 9adeaa4..3f1200f 100644 (file)
@@ -2277,13 +2277,15 @@ MAIN_INTERFACE int read_config_file(const char *name, int self, int allow_missin
     char *basedir = NULL, *sep = NULL;
     char *onmemory = NULL;
 
+       if(rcf_count == 0){
 #ifdef VOICE_EFFECT
-       cfg_flg_vfx = 0;
+               cfg_flg_vfx = 0;
 #endif
 #ifdef INT_SYNTH
-       cfg_flg_int_synth_mms = 0;
-       cfg_flg_int_synth_scc = 0;
+               cfg_flg_int_synth_mms = 0;
+               cfg_flg_int_synth_scc = 0;
 #endif
+       }
 
     if (rcf_count > 50)
     {
index 5d457b9..660ccce 100644 (file)
@@ -2821,6 +2821,8 @@ void init_voice_effect(int v)
                VoiceEffect *vfx = voice[v].vfx[i];
                int num = voice[v].sample->vfx[i][0]; // [0] = effect type
                        
+               if(!vfx)
+                       break;
                if(num <= VFX_NONE || num >= VFX_LIST_MAX)
                        break;
                memcpy(vfx->param, voice[v].sample->vfx[i], sizeof(int) * VOICE_EFFECT_PARAM_NUM);
index 0aba753..f5774d9 100644 (file)
@@ -285,6 +285,7 @@ Appendix :
     w[] and ip[] are compatible with all routines.
 */
 
+#include "optcode.h"
 
 void cdft(int n, int isgn, float *a, int *ip, float *w)
 {
@@ -358,6 +359,52 @@ void rdft(int n, int isgn, float *a, int *ip, float *w)
 }
 
 
+void rdft_simd(int n, int isgn, float *a, int *ip, float *w)
+{
+    void makewt(int nw, int *ip, float *w);
+    void makect(int nc, int *ip, float *c);
+    void bitrv2(int n, int *ip, float *a);
+    void cftfsub_simd(int n, float *a, float *w);
+    void cftbsub_simd(int n, float *a, float *w);
+    void rftfsub(int n, float *a, int nc, float *c);
+    void rftbsub(int n, float *a, int nc, float *c);
+    int nw, nc;
+    float xi;
+    
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 2)) {
+        nc = n >> 2;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub_simd(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xi = a[0] - a[1];
+        a[0] += a[1];
+        a[1] = xi;
+    } else {
+        a[1] = 0.5 * (a[0] - a[1]);
+        a[0] -= a[1];
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub_simd(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+}
+
 void ddct(int n, int isgn, float *a, int *ip, float *w)
 {
     void makewt(int nw, int *ip, float *w);
@@ -976,6 +1023,84 @@ void cftfsub(int n, float *a, float *w)
     }
 }
 
+void cftfsub_simd(int n, float *a, float *w)
+{
+    void cft1st(int n, float *a, float *w);
+    void cftmdl(int n, int l, float *a, float *w);
+    int j, j1, j2, j3, l;
+    float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+    
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+#if (USE_X86_EXT_INTRIN >= 2)
+               const __m128 vma1 = _mm_set_ps(1, -1, 1, -1);
+               const __m128 vam1 = _mm_set_ps(-1, 1, -1, 1);
+        for (j = 0; j < l; j += 4) {
+                       __m128 vj0, vj1, vj2, vj3, vx0, vx1, vx2, vx3;
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+                       vj0 = _mm_load_ps(&a[j]);
+                       vj1 = _mm_load_ps(&a[j1]);
+                       vj2 = _mm_load_ps(&a[j2]);
+                       vj3 = _mm_load_ps(&a[j3]);
+            vx0 = _mm_add_ps(vj0, vj1);
+                       vx1 = _mm_sub_ps(vj0, vj1);
+                       vx2 = _mm_add_ps(vj2, vj3);
+                       vx3 = _mm_sub_ps(vj2, vj3);
+                       vj0 = _mm_add_ps(vx0, vx2);
+                       vj2 = _mm_sub_ps(vx0, vx2);
+                       vx3 = _mm_shuffle_ps(vx3, vx3, 0xB1);
+                       vj1 = MM_FMA_PS(vx3, vma1, vx1);
+                       vj3 = MM_FMA_PS(vx3, vam1, vx1);
+                       _mm_store_ps(&a[j], vj0); 
+                       _mm_store_ps(&a[j1], vj1); 
+                       _mm_store_ps(&a[j2], vj2); 
+                       _mm_store_ps(&a[j3], vj3); 
+        }
+#else
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i - x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i + x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i - x3r;
+        }
+#endif
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = a[j + 1] - a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] += a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
 
 void cftbsub(int n, float *a, float *w)
 {
@@ -1028,6 +1153,85 @@ void cftbsub(int n, float *a, float *w)
     }
 }
 
+void cftbsub_simd(int n, float *a, float *w)
+{
+    void cft1st(int n, float *a, float *w);
+    void cftmdl(int n, int l, float *a, float *w);
+    int j, j1, j2, j3, l;
+    float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+    
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+#if (USE_X86_EXT_INTRIN >= 2)
+               const __m128 vma1 = _mm_set_ps(1, -1, 1, -1);
+               const __m128 vam1 = _mm_set_ps(-1, 1, -1, 1);
+        for (j = 0; j < l; j += 4) {
+                       __m128 vj0, vj1, vj2, vj3, vx0, vx1, vx2, vx3;
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+                       vj0 = _mm_load_ps(&a[j]);
+                       vj1 = _mm_load_ps(&a[j1]);
+                       vj2 = _mm_load_ps(&a[j2]);
+                       vj3 = _mm_load_ps(&a[j3]);
+                       vj0 = _mm_mul_ps(vj0, vam1);
+            vx0 = MM_FMA_PS(vj1, vam1, vj0);
+                       vx1 = MM_FMA_PS(vj1, vma1, vj0);
+                       vx2 = _mm_add_ps(vj2, vj3);
+                       vx3 = _mm_sub_ps(vj2, vj3);                     
+                       vj0 = MM_FMA_PS(vx2, vam1, vx0);
+                       vj2 = MM_FMA_PS(vx2, vma1, vx0);
+                       vx3 = _mm_shuffle_ps(vx3, vx3, 0xB1);                   
+                       vj1 = _mm_sub_ps(vx1, vx3);
+                       vj3 = _mm_add_ps(vx1, vx3);
+                       _mm_store_ps(&a[j], vj0); 
+                       _mm_store_ps(&a[j1], vj1); 
+                       _mm_store_ps(&a[j2], vj2); 
+                       _mm_store_ps(&a[j3], vj3); 
+        }
+#else
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = -a[j + 1] - a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = -a[j + 1] + a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i - x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i + x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i - x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i + x3r;
+        }
+#endif
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = -a[j + 1] + a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] = -a[j + 1] - a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
 
 void cft1st(int n, float *a, float *w)
 {
@@ -1285,7 +1489,6 @@ void rftfsub(int n, float *a, int nc, float *c)
     }
 }
 
-
 void rftbsub(int n, float *a, int nc, float *c)
 {
     int j, k, kk, ks, m;
index 9a7566b..91cdf41 100644 (file)
@@ -7,6 +7,7 @@
 */
 extern void cdft(int, int, float *, int *, float *);
 extern void rdft(int, int, float *, int *, float *);
+extern void rdft_simd(int, int, float *, int *, float *);
 extern void ddct(int, int, float *, int *, float *);
 extern void ddst(int, int, float *, int *, float *);
 extern void dfct(int, float *, float *, int *, float *);