OSDN Git Service

Import UnkoTim219 unkotim219
authorStarg <starg@users.osdn.me>
Sat, 25 Aug 2018 08:48:10 +0000 (17:48 +0900)
committerStarg <starg@users.osdn.me>
Sat, 25 Aug 2018 08:48:10 +0000 (17:48 +0900)
36 files changed:
config.h
interface/resource.h
interface/w32g_c.c
interface/w32g_i.c
interface/w32g_new_console.cpp [new file with mode: 0644]
interface/w32g_new_console.h [new file with mode: 0644]
interface/w32g_playlist.c
interface/w32g_pref.c
interface/w32g_res.h
interface/w32g_res.rc
interface/w32g_subwin.c
kbtimsetup/kbtimsetup.vcproj
kbtimsetup/kbtimsetup.vcxproj
sf_view_gui/sfview_main.cpp
timdrvsetup/timdrvsetup.vcproj
timdrvsetup/timdrvsetup.vcxproj
timidity/decode.c
timidity/decode.h
timidity/effect.c
timidity/effect.h
timidity/filter.c
timidity/instrum.c
timidity/mix.c
timidity/optcode.h
timidity/playmidi.c
timidity/resample.c
timidity/sndfont.c
timidity/thread.c
timidity/thread.h
timidity/thread_effect.c
timidity/thread_mix.c
timidity/thread_playmidi.c
timidity/timidity.c
timidity/wasapi_a.c
timw32g/timw32g.vcxproj
twsyng/twsyng.vcxproj

index 2270572..a15996e 100644 (file)
--- a/config.h
+++ b/config.h
 #define AU_WDMKS 1
 #define AU_WASAPI 1
 #undef AU_VORBIS
-#undef AU_VORBIS_DLL
+//#undef AU_VORBIS_DLL
 #undef AU_GOGO
 #undef AU_GOGO_DLL
 #undef AU_LAME
@@ -726,7 +726,7 @@ typedef __int64 int64;
 #undef AU_WASAPI
 #undef AU_WDMKS
 #undef AU_VORBIS
-#undef AU_VORBIS_DLL
+//#undef AU_VORBIS_DLL
 #undef AU_GOGO
 #undef AU_GOGO_DLL
 #undef AU_LAME
@@ -813,4 +813,4 @@ typedef __int64 int64;
 #undef HAVE_LIBGOGOL
 #undef __W32G__        /* for Win32 GUI */
 #undef SUPPORT_SOUNDSPEC
-#endif
\ No newline at end of file
+#endif
index b654226..3680439 100644 (file)
 #define IDC_CHK_OWRITE_MODENV                   7003
 #define IDC_CHK_OWRITE_ENV                      7004
 #define IDC_CHK_OWRITE_VEL                      7005
-#define IDC_SFOW_VIBDELAY                       7050
-#define IDC_SFOW_VIBDEPTH                       7051
-#define IDC_SFOW_TRMDELAY                       7052
-#define IDC_SFOW_TRMDEPTH                       7053
-#define IDC_SFOW_TRMFC                          7054
-#define IDC_SFOW_TRMPITCH                       7055
-#define IDC_SFOW_VELFC                          7056
-#define IDC_SFOW_VELTHR                         7057
-#define IDC_SFOW_VELRES                         7058
-#define IDC_SFOW_MODENVFC                       7059
-#define IDC_SFOW_MODENVDELAY                    7060
-#define IDC_SFOW_MODENVPITCH                    7061
-#define IDC_SFOW_CUTOFF                         7062
-#define IDC_SFOW_RESONANCE                      7063
-#define IDC_SFOW_ENVDELAY                       7064
-#define IDC_SFATT_NEG                           7080
-#define IDC_SFATT_POW                           7081
-#define IDC_SFATT_MUL                           7082
-#define IDC_SFATT_ADD                           7083
+#define IDC_SFOW_VIBDELAY                       7020
+#define IDC_SFOW_VIBDEPTH                       7021
+#define IDC_SFOW_TRMDELAY                       7022
+#define IDC_SFOW_TRMDEPTH                       7023
+#define IDC_SFOW_TRMFC                          7024
+#define IDC_SFOW_TRMPITCH                       7025
+#define IDC_SFOW_VELFC                          7026
+#define IDC_SFOW_VELTHR                         7027
+#define IDC_SFOW_VELRES                         7028
+#define IDC_SFOW_MODENVFC                       7029
+#define IDC_SFOW_MODENVDELAY                    7030
+#define IDC_SFOW_MODENVPITCH                    7031
+#define IDC_SFOW_CUTOFF                         7032
+#define IDC_SFOW_RESONANCE                      7033
+#define IDC_SFOW_ENVDELAY                       7034
+#define IDC_SFATT_NEG                           7050
+#define IDC_SFATT_POW                           7051
+#define IDC_SFATT_MUL                           7052
+#define IDC_SFATT_ADD                           7053
+#define IDC_SFL_VOLENV_ATK                      7060
+#define IDC_SFL_MODENV_ATK                      7061
+#define IDC_SFL_MODENV_FC                       7062
+#define IDC_SFL_MODENV_PIT                      7063
+#define IDC_SFL_MODLFO_FC                       7064
+#define IDC_SFL_MODLFO_PIT                      7065
+#define IDC_SFL_VIBLFO_PIT                      7066
+#define IDC_SFL_MODLFO_FREQ                     7067
+#define IDC_SFL_VIBLFO_FREQ                     7068
+#define IDC_SFD_MODLFO_FREQ                     7070
+#define IDC_SFD_VIBLFO_FREQ                     7071
+#define IDC_SFC_LFO_SWAP                        7080
+#define IDC_SFC_ADRS_OFFSET                     7081
 #define IDC_GSENV_ATTACK_CALC                   7100
 #define IDC_GSENV_DECAY_CALC                    7101
 #define IDC_GSENV_RELEASE_CALC                  7102
 #define IDC_RADIOBUTTON_WAVE_FORMAT_EXT         8002
 #define IDC_EDIT_WMME_BUFFER_BIT                8003
 #define IDC_EDIT_WMME_BUFFER_NUM                8004
+#define IDC_COMBO_WDMKS_DEV                     8020
+#define IDC_RADIOBUTTON_WDMKS_FORMAT_EX         8021
+#define IDC_RADIOBUTTON_WDMKS_FORMAT_EXT        8022
+#define IDC_RADIOBUTTON_WDMKS_EVENT             8023
+#define IDC_RADIOBUTTON_WDMKS_POLLING           8024
+#define IDC_COMBO_WDMKS_THREAD_PRIORITY         8025
+#define IDC_COMBO_WDMKS_RT_PRIORITY             8026
+#define IDC_COMBO_WDMKS_PIN_PRIORITY            8027
+#define IDC_EDIT_WDMKS_LATENCY                  8029
+#define IDC_STATIC_WDMKS_STREAM_TYPE            8030
+#define IDC_STATIC_WDMKS_LATENCY_MIN            8031
+#define IDC_STATIC_WDMKS_LATENCY_MAX            8032
+#define IDC_STATIC_WDMKS_RATE_MAX               8033
+#define IDC_STATIC_WDMKS_RATE_MIN               8034
+#define IDC_STATIC_WDMKS_BITS_MIN               8035
+#define IDC_STATIC_WDMKS_BITS_MAX               8036
+#define IDC_STATIC_WDMKS_FLOAT                  8037
+#define IDC_STATIC_WDMKS_DEVICE_ID              8038
 #define IDC_COMBO_WASAPI_DEV                    8050
 #define IDC_RADIOBUTTON_WASAPI_FORMAT_EX        8051
 #define IDC_RADIOBUTTON_WASAPI_FORMAT_EXT       8052
 #define IDC_RADIOBUTTON_WASAPI_EXCLUSIVE        8054
 #define IDC_RADIOBUTTON_WASAPI_EVENT            8055
 #define IDC_RADIOBUTTON_WASAPI_POLLING          8056
-#define IDC_COMBO_WASAPI_PRIORITY               8057
-#define IDC_COMBO_WASAPI_STREAM_CATEGORY        8058
-#define IDC_COMBO_WASAPI_STREAM_OPTION          8059
-#define IDC_EDIT_WASAPI_LATENCY                 8060
-#define IDC_STATIC_WASAPI_LATENCY_MIN           8061
-#define IDC_STATIC_WASAPI_LATENCY_MAX           8062
-#define IDC_COMBO_WDMKS_DEV                     8070
-#define IDC_RADIOBUTTON_WDMKS_FORMAT_EX         8071
-#define IDC_RADIOBUTTON_WDMKS_FORMAT_EXT        8072
-#define IDC_RADIOBUTTON_WDMKS_CYCLIC            8073
-#define IDC_RADIOBUTTON_WDMKS_RT                8074
-#define IDC_RADIOBUTTON_WDMKS_EVENT             8075
-#define IDC_RADIOBUTTON_WDMKS_POLLING           8076
-#define IDC_COMBO_WDMKS_PRIORITY                8077
-#define IDC_COMBO_WDMKS_PRIORITY_RT             8078
-#define IDC_EDIT_WDMKS_LATENCY                  8080
-#define IDC_STATIC_WDMKS_LATENCY_MIN            8081
-#define IDC_STATIC_WDMKS_LATENCY_MAX            8082
-#define IDC_STATIC_WDMKS_STREAM_TYPE            8084
+#define IDC_COMBO_WASAPI_PRIORITY               8058
+#define IDC_COMBO_WASAPI_STREAM_CATEGORY        8060
+#define IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_RAW  8061
+#define IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_MATCH_FORMAT 8062
+#define IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_AMBISONICS 8063
+#define IDC_EDIT_WASAPI_LATENCY                 8064
+#define IDC_STATIC_WASAPI_LATENCY_MIN           8065
+#define IDC_STATIC_WASAPI_LATENCY_MAX           8066
 #define IDC_COMBO_PA_ASIO_DEV                   8100
 #define IDC_BUTTON_PA_ASIO_CONFIG               8101
 #define IDC_COMBO_PA_WMME_DEV                   8102
index 8153e0c..47c7a38 100644 (file)
@@ -253,6 +253,10 @@ static void ctl_close(void)
        ctl.opened = 0;
        safe_free(Panel);
        Panel = NULL;
+       
+#ifdef TIMW32G_USE_NEW_CONSOLE
+       ClearNewConsoleBuffer();
+#endif
 
 //#ifdef FORCE_TIME_PERIOD
 //     timeEndPeriod(tcaps.wPeriodMin);
@@ -921,8 +925,12 @@ static int cmsg(int type, int verbosity_level, char *fmt, ...)
        ShowStartupMessage();
        w32g_msg_box(buffer, "TiMidity Error", MB_OK);
     }
+#ifdef TIMW32G_USE_NEW_CONSOLE
+       NewConsoleBufferWriteCMsg(type, verbosity_level, buffer);
+#else
     PutsConsoleWnd(buffer);
     PutsConsoleWnd("\n");
+#endif
     return 0;
 }
 
index 09f76d3..1aa189b 100644 (file)
@@ -89,7 +89,7 @@ WINAPI void InitCommonControls(void);
 
 static void InitMainWnd(HWND hStartWnd);
 
-static void ConsoleWndVerbosityApplyIncDec(int num);
+static void ConsoleWndVerbosityApplySet(int num);
 void ConsoleWndVerbosityApply(void);
 
 void CanvasPaintAll(void);
@@ -1755,8 +1755,8 @@ void WINAPI DebugThread(void *args)
 {
        MSG msg;
        DebugThreadExit = 0;
-       InitDebugWnd(NULL);
-//     ShowWindow(hDebugWnd,SW_SHOW);
+//     InitDebugWnd(NULL);
+       ShowWindow(hDebugWnd,SW_SHOW);
        AttachThreadInput(GetWindowThreadProcessId(hDebugThread,NULL),
        GetWindowThreadProcessId(hWindowThread,NULL),TRUE);
        AttachThreadInput(GetWindowThreadProcessId(hWindowThread,NULL),
@@ -4691,7 +4691,8 @@ void TiMidityVariablesCheck(void)
 #endif
 }
 
-
+extern int32 test_var[10];
+int32 test_var[10] = {0};
 
 
 // ****************************************************************************
@@ -4750,6 +4751,24 @@ DebugWndProc(HWND hwnd, UINT uMess, WPARAM wParam, LPARAM lParam)
       case IDC_BUTTON_VARIABLES_CHECK:
                        TiMidityVariablesCheck();
                break;
+       case IDC_EDIT_VAR0:
+       case IDC_EDIT_VAR1:
+       case IDC_EDIT_VAR2:
+       case IDC_EDIT_VAR3:
+       case IDC_EDIT_VAR4:
+       case IDC_EDIT_VAR5:
+       case IDC_EDIT_VAR6:
+       case IDC_EDIT_VAR7:
+       case IDC_EDIT_VAR8:
+       case IDC_EDIT_VAR9:
+       break;
+       case IDC_BUTTON_VAR_ENTER:
+       {
+               int i;          
+               for(i=0; i<10;i++)
+                       test_var[i] = GetDlgItemInt(hwnd, IDC_EDIT_VAR0 + i, NULL, TRUE);
+       }
+               break;
       default:
                        break;
       }
@@ -5060,6 +5079,7 @@ static void DlgDirOpen(HWND hwnd)
        itemidlist = SHBrowseForFolder(&bi);
        if(!itemidlist)
                return; /* Cancel */
+       memset(Buffer, 0, sizeof(Buffer));
        SHGetPathFromIDList(itemidlist, Buffer);
        strncpy(biBuffer, Buffer, sizeof(Buffer) - 1);
        if(itemidlist_pre)
diff --git a/interface/w32g_new_console.cpp b/interface/w32g_new_console.cpp
new file mode 100644 (file)
index 0000000..adadbaa
--- /dev/null
@@ -0,0 +1,1248 @@
+// TiMidity++ Win32 GUI New Console
+// Copyright (c) 2018 Starg <https://osdn.net/projects/timidity41>
+
+
+extern "C"
+{
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "timidity.h"
+#include "common.h"
+#include "controls.h"
+#include "instrum.h"
+#include "playmidi.h"
+
+#include "w32g.h"
+#include "w32g_res.h"
+
+#include "w32g_new_console.h"
+}
+
+#include <windows.h>
+#include <windowsx.h>
+
+#include <cstddef>
+#include <cstdarg>
+#include <cstdio>
+
+#include <algorithm>
+#include <array>
+#include <numeric>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include <tchar.h>
+
+#ifdef min
+#undef min
+#endif
+
+#ifdef max
+#undef max
+#endif
+
+namespace TimW32gNewConsole
+{
+
+LPCTSTR pClassName = _T("TimW32gNewConsole");
+
+// Campbell color theme
+// https://github.com/Microsoft/console/blob/master/tools/ColorTool/schemes/campbell.ini
+const COLORREF BackgroundColor = RGB(12, 12, 12);
+const COLORREF NormalColor = RGB(204, 204, 204);
+const COLORREF ErrorColor = RGB(231, 72, 86);
+const COLORREF WarningColor = RGB(249, 241, 165);
+const COLORREF InfoColor = RGB(58, 150, 221);
+
+using TString = std::basic_string<TCHAR>;
+using TStringView = std::basic_string_view<TCHAR>;
+
+bool CopyTextToClipboard(TStringView text)
+{
+    bool ret = false;
+
+    if (::OpenClipboard(nullptr))
+    {
+        HGLOBAL hGlobal = ::GlobalAlloc(GMEM_MOVEABLE | GMEM_SHARE, (text.size() + 1) * sizeof(TCHAR));
+
+        if (hGlobal)
+        {
+            auto p = reinterpret_cast<LPTSTR>(::GlobalLock(hGlobal));
+            text.copy(p, text.size());
+            p[text.size()] = _T('\0');
+            ::GlobalUnlock(hGlobal);
+
+            ::EmptyClipboard();
+
+#ifdef UNICODE
+            UINT format = CF_UNICODETEXT;
+#else
+            UINT format = CF_TEXT;
+#endif
+
+            if (::SetClipboardData(format, hGlobal))
+            {
+                ret = true;
+            }
+            else
+            {
+                ::GlobalFree(hGlobal);
+            }
+        }
+
+        ::CloseClipboard();
+    }
+
+    return ret;
+}
+
+template<typename T>
+class UniqueLock
+{
+public:
+    UniqueLock() : m_pLock(nullptr)
+    {
+    }
+
+    explicit UniqueLock(T& lock) : m_pLock(&lock)
+    {
+        m_pLock->DoLockUnique();
+    }
+
+    UniqueLock(const UniqueLock&) = delete;
+    UniqueLock& operator=(const UniqueLock&) = delete;
+
+    UniqueLock(UniqueLock&& rhs) noexcept : m_pLock()
+    {
+        swap(rhs);
+    }
+
+    UniqueLock& operator=(UniqueLock&& rhs) noexcept
+    {
+        UniqueLock(std::move(rhs)).swap(*this);
+        return *this;
+    }
+
+    ~UniqueLock()
+    {
+        Unlock();
+    }
+
+    void swap(UniqueLock& rhs) noexcept
+    {
+        using std::swap;
+        swap(m_pLock, rhs.m_pLock);
+    }
+
+    void Unlock()
+    {
+        if (m_pLock)
+        {
+            m_pLock->DoUnlockUnique();
+            m_pLock = nullptr;
+        }
+    }
+
+private:
+    T* m_pLock;
+};
+
+template<typename T>
+class SharedLock
+{
+public:
+    SharedLock() : m_pLock(nullptr)
+    {
+    }
+
+    explicit SharedLock(T& lock) : m_pLock(&lock)
+    {
+        m_pLock->DoLockShared();
+    }
+
+    SharedLock(const SharedLock&) = delete;
+    SharedLock& operator=(const SharedLock&) = delete;
+
+    SharedLock(SharedLock&& rhs) noexcept : m_pLock()
+    {
+        swap(rhs);
+    }
+
+    SharedLock& operator=(SharedLock&& rhs) noexcept
+    {
+        SharedLock(std::move(rhs)).swap(*this);
+        return *this;
+    }
+
+    ~SharedLock()
+    {
+        Unlock();
+    }
+
+    void swap(SharedLock& rhs) noexcept
+    {
+        using std::swap;
+        swap(m_pLock, rhs.m_pLock);
+    }
+
+    void Unlock()
+    {
+        if (m_pLock)
+        {
+            m_pLock->DoUnlockShared();
+            m_pLock = nullptr;
+        }
+    }
+
+private:
+    T* m_pLock;
+};
+
+class SRWLock
+{
+    friend class UniqueLock<SRWLock>;
+    friend class SharedLock<SRWLock>;
+
+public:
+    SRWLock()
+    {
+        ::InitializeSRWLock(&m_Lock);
+    }
+
+    SRWLock(const SRWLock&) = delete;
+    SRWLock& operator=(const SRWLock&) = delete;
+    SRWLock(SRWLock&&) = delete;
+    SRWLock& operator=(SRWLock&&) = delete;
+
+    ~SRWLock() = default;
+
+    UniqueLock<SRWLock> LockUnique()
+    {
+        return UniqueLock<SRWLock>(*this);
+    }
+
+    SharedLock<SRWLock> LockShared()
+    {
+        return SharedLock<SRWLock>(*this);
+    }
+
+    SRWLOCK* Get()
+    {
+        return &m_Lock;
+    }
+
+private:
+    void DoLockUnique()
+    {
+        ::AcquireSRWLockExclusive(&m_Lock);
+    }
+
+    void DoUnlockUnique()
+    {
+        ::ReleaseSRWLockExclusive(&m_Lock);
+    }
+
+    void DoLockShared()
+    {
+        ::AcquireSRWLockShared(&m_Lock);
+    }
+
+    void DoUnlockShared()
+    {
+        ::ReleaseSRWLockShared(&m_Lock);
+    }
+
+    SRWLOCK m_Lock;
+};
+
+struct TextLocationInfo
+{
+    std::size_t Line;
+    std::size_t Column;
+};
+
+struct StyledLineFragment
+{
+    std::size_t Offset;        // offset in std::string
+    std::size_t Length;
+    COLORREF Color;
+};
+
+struct StyledLine
+{
+    std::size_t Offset;        // offset in std::vector<StyledLineFragment>
+    std::size_t Length;
+};
+
+class StyledTextBuffer
+{
+public:
+    StyledTextBuffer()
+    {
+    }
+
+    void Clear()
+    {
+        m_Fragments.clear();
+        m_Lines.clear();
+        m_String.clear();
+        m_MaxColumnLength = 0;
+    }
+
+    void Append(COLORREF color, LPCTSTR pText)
+    {
+        Append(color, TStringView(pText));
+    }
+
+    void Append(COLORREF color, TStringView text)
+    {
+        std::size_t offset = 0;
+
+        while (offset < text.size())
+        {
+            // split input into lines
+            std::size_t nlOffset = text.find_first_of(_T("\r\n"), offset);
+
+            if (nlOffset == text.npos)
+            {
+                AppendNoNewline(color, text.substr(offset));
+                break;
+            }
+            else
+            {
+                if (offset < nlOffset)
+                {
+                    AppendNoNewline(color, text.substr(offset, offset - nlOffset));
+                }
+
+                AppendNewline();
+
+                if (text[nlOffset] == _T('\r') && nlOffset + 1 < text.size() && text[nlOffset + 1] == _T('\n'))
+                {
+                    offset = nlOffset + 2;
+                }
+                else
+                {
+                    offset = nlOffset + 1;
+                }
+            }
+        }
+    }
+
+    void AppendNewline()
+    {
+        m_Lines.push_back({m_Fragments.size(), 0});
+    }
+
+    std::size_t GetLineCount() const
+    {
+        return m_Lines.size();
+    }
+
+    std::size_t GetMaxColumnLength() const
+    {
+        return m_MaxColumnLength;
+    }
+
+    std::size_t GetColumnLength(std::size_t line) const
+    {
+        return std::accumulate(
+            m_Fragments.begin() + m_Lines[line].Offset,
+            m_Fragments.begin() + m_Lines[line].Offset + m_Lines[line].Length,
+            0,
+            [] (auto&& a, auto&& b)
+            {
+                return a + b.Length;
+            }
+        );
+    }
+
+    TStringView GetString() const
+    {
+        return m_String;
+    }
+
+    const std::vector<StyledLine>& GetLines() const
+    {
+        return m_Lines;
+    }
+
+    const std::vector<StyledLineFragment>& GetFragments() const
+    {
+        return m_Fragments;
+    }
+
+    TStringView GetLineString(std::size_t line) const
+    {
+        const auto& lineInfo = m_Lines[line];
+
+        if (lineInfo.Length == 0)
+        {
+            return {};
+        }
+
+        std::size_t first = m_Fragments[lineInfo.Offset].Offset;
+        std::size_t last = lineInfo.Offset + lineInfo.Length == m_Fragments.size()
+            ? m_String.size()
+            : m_Fragments[lineInfo.Offset + lineInfo.Length].Offset;
+
+        return TStringView(m_String.data() + first, last - first);
+    }
+
+    TString CopySubstring(TextLocationInfo start, TextLocationInfo end) const
+    {
+        TString ret(GetLineString(start.Line).substr(start.Column, start.Line < end.Line ? TStringView::npos : end.Column + 1 - start.Column));
+
+        for (std::size_t line = start.Line + 1; line <= end.Line; line++)
+        {
+            ret.append(_T("\r\n"));
+            ret.append(GetLineString(line).substr(0, line < end.Line ? TStringView::npos : end.Column + 1));
+        }
+
+        return ret;
+    }
+
+private:
+    void AppendNoNewline(COLORREF color, TStringView text)
+    {
+        std::size_t stringOffset = m_String.size();
+        m_String.append(text);
+
+        std::size_t fragmentOffset = m_Fragments.size();
+        m_Fragments.push_back({stringOffset, text.size(), color});
+
+        if (m_Lines.empty())
+        {
+            m_Lines.push_back({fragmentOffset, 1});
+        }
+        else
+        {
+            m_Lines.back().Length++;
+        }
+
+        // update m_MaxColumnLength
+        m_MaxColumnLength = std::max(GetColumnLength(GetLineCount() - 1), m_MaxColumnLength);
+    }
+
+    TString m_String;
+    std::vector<StyledLine> m_Lines;
+    std::vector<StyledLineFragment> m_Fragments;
+    std::size_t m_MaxColumnLength = 0;    // max number of characters in line
+};
+
+StyledTextBuffer GlobalNewConsoleBuffer;
+
+class NewConsoleWindow
+{
+    enum TimerKind
+    {
+        RedrawTimer,
+        DragScrollTimer
+    };
+
+public:
+    explicit NewConsoleWindow(StyledTextBuffer& buffer) : m_Buffer(buffer)
+    {
+    }
+
+    NewConsoleWindow(const NewConsoleWindow&) = delete;
+    NewConsoleWindow& operator=(const NewConsoleWindow&) = delete;
+    ~NewConsoleWindow() = default;
+
+    void Clear()
+    {
+        auto lock = m_Lock.LockUnique();
+        m_Buffer.Clear();
+    }
+
+    void Write(LPCTSTR pText)
+    {
+        Write(NormalColor, pText, false);
+    }
+
+    void WriteV(LPCTSTR pFormat, va_list args)
+    {
+        std::array<TCHAR, BUFSIZ> buf;
+        std::vsnprintf(buf.data(), buf.size(), pFormat, args);
+        Write(NormalColor, buf.data(), false);
+    }
+
+    void Write(COLORREF color, LPCTSTR pText, bool newline)
+    {
+        auto lock = m_Lock.LockUnique();
+        bool shouldAutoScroll = ShouldAutoScroll();
+
+        m_Buffer.Append(color, pText);
+
+        if (newline)
+        {
+            m_Buffer.AppendNewline();
+        }
+
+        if (shouldAutoScroll)
+        {
+            DoAutoScroll();
+        }
+    }
+
+    static LRESULT CALLBACK WindowProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
+    {
+        auto pConsoleWindow = reinterpret_cast<NewConsoleWindow*>(::GetWindowLongPtr(hWnd, GWLP_USERDATA));
+
+        switch (msg)
+        {
+        case WM_CREATE:
+            pConsoleWindow = new NewConsoleWindow(GlobalNewConsoleBuffer);
+            ::SetWindowLongPtr(hWnd, GWLP_USERDATA, reinterpret_cast<LONG_PTR>(pConsoleWindow));
+            pConsoleWindow->m_hWnd = hWnd;
+            pConsoleWindow->OnCreate();
+            return 0;
+
+        case WM_DESTROY:
+            if (pConsoleWindow)
+            {
+                pConsoleWindow->OnDestroy();
+                pConsoleWindow->m_hWnd = nullptr;
+                ::SetWindowLongPtr(hWnd, GWLP_USERDATA, 0);
+                delete pConsoleWindow;
+            }
+            return 0;
+
+        default:
+            if (pConsoleWindow)
+            {
+                switch (msg)
+                {
+                case WM_SIZE:
+                    pConsoleWindow->OnSize();
+                    return 0;
+
+                case WM_PAINT:
+                    pConsoleWindow->OnPaint();
+                    return 0;
+
+                case WM_VSCROLL:
+                    pConsoleWindow->OnVScroll(wParam, lParam);
+                    return 0;
+
+                case WM_HSCROLL:
+                    pConsoleWindow->OnHScroll(wParam, lParam);
+                    return 0;
+
+                case WM_MOUSEWHEEL:
+                    pConsoleWindow->OnMouseWheel(wParam, lParam);
+                    return 0;
+
+                case WM_MOUSEHWHEEL:
+                    pConsoleWindow->OnMouseHWheel(wParam, lParam);
+                    return 0;
+
+                case WM_LBUTTONDOWN:
+                    pConsoleWindow->OnLButtonDown(wParam, lParam);
+                    return 0;
+
+                case WM_LBUTTONUP:
+                    pConsoleWindow->OnLButtonUp(wParam, lParam);
+                    return 0;
+
+                case WM_MOUSEMOVE:
+                    pConsoleWindow->OnMouseMove(wParam, lParam);
+                    return 0;
+
+                case WM_KEYDOWN:
+                    pConsoleWindow->OnKeyDown(wParam, lParam);
+                    return 0;
+
+                case WM_TIMER:
+                    pConsoleWindow->OnTimer(wParam, lParam);
+                    return 0;
+
+                default:
+                    break;
+                }
+            }
+            break;
+        }
+
+        return ::DefWindowProc(hWnd, msg, wParam, lParam);
+    }
+
+private:
+    void OnCreate()
+    {
+        ::SetTimer(m_hWnd, RedrawTimer, 200, nullptr);
+
+        ::ShowScrollBar(m_hWnd, SB_BOTH, true);
+        InitializeGDIResource();
+        InvalidateRect(m_hWnd, nullptr, true);
+    }
+
+    void OnDestroy()
+    {
+        ::KillTimer(m_hWnd, RedrawTimer);
+        UninitializeGDIResource();
+    }
+
+    void OnSize()
+    {
+        // Recreate everything
+        InitializeGDIResource();
+        InvalidateRect(m_hWnd, nullptr, true);
+    }
+
+    void OnPaint()
+    {
+        PAINTSTRUCT ps;
+        HDC hDC = ::BeginPaint(m_hWnd, &ps);
+
+        RECT rc;
+        ::GetClientRect(m_hWnd, &rc);
+        ::FillRect(m_hBackDC, &rc, m_hBgBrush);
+
+        {
+            auto lock = m_Lock.LockShared();
+            UpdateScrollBarsNoLock();
+
+            int lineCount = std::min(static_cast<int>(m_Buffer.GetLineCount() - m_CurrentTopLineNumber), GetVisibleLinesInWindow());
+
+            for (int i = 0; i < lineCount; i++)
+            {
+                auto lineInfo = m_Buffer.GetLines()[m_CurrentTopLineNumber + i];
+                auto first = m_Buffer.GetFragments().begin() + lineInfo.Offset;
+                auto last = first + lineInfo.Length;
+
+                int x = -m_CurrentLeftColumnNumber * m_FontWidth;
+                int y = i * m_FontHeight;
+
+                std::for_each(
+                    first,
+                    last,
+                    [str = m_Buffer.GetString(), hWnd = m_hWnd, hDC = m_hBackDC, &x, y, fontWidth = m_FontWidth] (const StyledLineFragment& lf)
+                    {
+                        ::SetTextColor(hDC, lf.Color);
+                        ::TextOut(hDC, x, y, str.data() + lf.Offset, lf.Length);
+                        x += lf.Length * fontWidth;
+                    }
+                );
+            }
+
+            if (m_SelStart.has_value() && m_SelEnd.has_value())
+            {
+                auto [x, y] = PositionFromTextLocation(*m_SelStart);
+                auto [xe, ye] = PositionFromTextLocation(*m_SelEnd);
+
+                if (m_SelStart->Line == m_SelEnd->Line)
+                {
+                    ::BitBlt(m_hBackDC, x, y, xe - x + m_FontWidth, m_FontHeight, nullptr, 0, 0, DSTINVERT);
+                }
+                else
+                {
+                    ::BitBlt(m_hBackDC, x, y, (rc.right - rc.left) - x, m_FontHeight, nullptr, 0, 0, DSTINVERT);
+                    ::BitBlt(m_hBackDC, 0, y + m_FontHeight, rc.right - rc.left, ye - (y + m_FontHeight), nullptr, 0, 0, DSTINVERT);
+                    ::BitBlt(m_hBackDC, 0, ye, xe + m_FontWidth, m_FontHeight, nullptr, 0, 0, DSTINVERT);
+                }
+            }
+        }
+
+        ::BitBlt(hDC, 0, 0, rc.right - rc.left, rc.bottom - rc.top, m_hBackDC, 0, 0, SRCCOPY);
+        ::EndPaint(m_hWnd, &ps);
+    }
+
+    void OnVScroll(WPARAM wParam, LPARAM)
+    {
+        auto lock = m_Lock.LockUnique();
+
+        switch (LOWORD(wParam))
+        {
+        case SB_TOP:
+            m_CurrentTopLineNumber = 0;
+            break;
+
+        case SB_BOTTOM:
+            m_CurrentTopLineNumber = GetMaxTopLineNumber();
+            break;
+
+        case SB_LINEUP:
+            m_CurrentTopLineNumber = std::max(0, m_CurrentTopLineNumber - 1);
+            break;
+
+        case SB_LINEDOWN:
+            m_CurrentTopLineNumber = std::min(m_CurrentTopLineNumber + 1, GetMaxTopLineNumber());
+            break;
+
+        case SB_PAGEUP:
+            m_CurrentTopLineNumber = std::max(0, m_CurrentTopLineNumber - GetVisibleLinesInWindow());
+            break;
+
+        case SB_PAGEDOWN:
+            m_CurrentTopLineNumber = std::min(m_CurrentTopLineNumber + GetVisibleLinesInWindow(), GetMaxTopLineNumber());
+            break;
+
+        case SB_THUMBPOSITION:
+        case SB_THUMBTRACK:
+            {
+                SCROLLINFO si = {};
+                si.cbSize = sizeof(SCROLLINFO);
+                si.fMask = SIF_TRACKPOS;
+                ::GetScrollInfo(m_hWnd, SB_VERT, &si);
+                m_CurrentTopLineNumber = si.nTrackPos;
+            }
+            break;
+
+        default:
+            break;
+        }
+
+        InvalidateRect(m_hWnd, nullptr, true);
+    }
+
+    void OnHScroll(WPARAM wParam, LPARAM)
+    {
+        auto lock = m_Lock.LockUnique();
+
+        switch (LOWORD(wParam))
+        {
+        case SB_LEFT:
+            m_CurrentLeftColumnNumber = 0;
+            break;
+
+        case SB_RIGHT:
+            m_CurrentLeftColumnNumber = GetMaxLeftColumnNumber();
+            break;
+
+        case SB_LINELEFT:
+            m_CurrentLeftColumnNumber = std::max(0, m_CurrentLeftColumnNumber - 1);
+            break;
+
+        case SB_LINERIGHT:
+            m_CurrentLeftColumnNumber = std::min(m_CurrentLeftColumnNumber + 1, GetMaxLeftColumnNumber());
+            break;
+
+        case SB_PAGELEFT:
+            m_CurrentLeftColumnNumber = std::max(0, m_CurrentLeftColumnNumber - GetVisileColumnsInWindow());
+            break;
+
+        case SB_PAGERIGHT:
+            m_CurrentLeftColumnNumber = std::min(m_CurrentLeftColumnNumber + GetVisileColumnsInWindow(), GetMaxLeftColumnNumber());
+            break;
+
+        case SB_THUMBPOSITION:
+        case SB_THUMBTRACK:
+            {
+                SCROLLINFO si = {};
+                si.cbSize = sizeof(SCROLLINFO);
+                si.fMask = SIF_TRACKPOS;
+                ::GetScrollInfo(m_hWnd, SB_HORZ, &si);
+                m_CurrentLeftColumnNumber = si.nTrackPos;
+            }
+            break;
+
+        default:
+            break;
+        }
+
+        InvalidateRect(m_hWnd, nullptr, true);
+    }
+
+    void OnMouseWheel(WPARAM wParam, LPARAM)
+    {
+        auto lock = m_Lock.LockUnique();
+
+        m_CurrentTopLineNumber = std::clamp(
+            m_CurrentTopLineNumber - GET_WHEEL_DELTA_WPARAM(wParam) * 3 / WHEEL_DELTA,
+            0,
+            GetMaxTopLineNumber()
+        );
+
+        InvalidateRect(m_hWnd, nullptr, true);
+    }
+
+    void OnMouseHWheel(WPARAM wParam, LPARAM)
+    {
+        auto lock = m_Lock.LockUnique();
+
+        m_CurrentLeftColumnNumber = std::clamp(
+            m_CurrentLeftColumnNumber + GET_WHEEL_DELTA_WPARAM(wParam) * 4 / WHEEL_DELTA,
+            0,
+            GetMaxLeftColumnNumber()
+        );
+
+        InvalidateRect(m_hWnd, nullptr, true);
+    }
+
+    void OnLButtonDown(WPARAM, LPARAM lParam)
+    {
+        auto lock = m_Lock.LockShared();
+        m_SelStart = TextLocationFromPosition(GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam), true);
+        m_SelEnd = m_SelStart;
+
+        if (m_SelStart.has_value())
+        {
+            SetCapture(m_hWnd);
+            ::SetTimer(m_hWnd, DragScrollTimer, 100, nullptr);
+        }
+
+        InvalidateRect(m_hWnd, nullptr, true);
+    }
+
+    void OnLButtonUp(WPARAM, LPARAM lParam)
+    {
+        if (m_SelStart.has_value())
+        {
+            ::KillTimer(m_hWnd, DragScrollTimer);
+            ::ReleaseCapture();
+
+            auto lock = m_Lock.LockShared();
+            m_SelEnd = TextLocationFromPosition(GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam), false);
+
+            if (m_SelStart.has_value() && m_SelEnd.has_value())
+            {
+                if (std::make_pair(m_SelStart->Line, m_SelStart->Column) > std::make_pair(m_SelEnd->Line, m_SelEnd->Column))
+                {
+                    m_SelStart.swap(m_SelEnd);
+                }
+
+                CopyTextToClipboard(m_Buffer.CopySubstring(*m_SelStart, *m_SelEnd));
+            }
+            else
+            {
+                m_SelStart.reset();
+                m_SelEnd.reset();
+            }
+
+            InvalidateRect(m_hWnd, nullptr, true);
+        }
+    }
+
+    void OnMouseMove(WPARAM wParam, LPARAM lParam)
+    {
+        if (m_SelStart.has_value() && (wParam & MK_LBUTTON))
+        {
+            auto lock = m_Lock.LockShared();
+            m_SelEnd = TextLocationFromPosition(GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam), false);
+            InvalidateRect(m_hWnd, nullptr, true);
+        }
+    }
+
+    void OnKeyDown(WPARAM wParam, LPARAM)
+    {
+        auto lock = m_Lock.LockUnique();
+
+        switch (wParam)
+        {
+        case 'A':
+            if (::GetKeyState(VK_CONTROL) < 0)
+            {
+                m_SelStart = {0, 0};
+                std::size_t lastLine = std::max(static_cast<std::size_t>(0), m_Buffer.GetLineCount() - 1);
+                m_SelEnd = {lastLine, std::max(static_cast<std::size_t>(0), m_Buffer.GetColumnLength(lastLine) - 1)};
+                CopyTextToClipboard(m_Buffer.CopySubstring(*m_SelStart, *m_SelEnd));
+            }
+            break;
+
+        case VK_UP:
+        case 'K':
+        case 'P':
+            m_CurrentTopLineNumber = std::max(0, m_CurrentTopLineNumber - 1);
+            break;
+
+        case VK_DOWN:
+        case 'J':
+        case 'N':
+            m_CurrentTopLineNumber = std::min(m_CurrentTopLineNumber + 1, GetMaxTopLineNumber());
+            break;
+
+        case VK_LEFT:
+        case 'H':
+        case 'B':
+            m_CurrentLeftColumnNumber = std::max(0, m_CurrentLeftColumnNumber - 1);
+            break;
+
+        case VK_RIGHT:
+        case 'L':
+        case 'F':
+            m_CurrentLeftColumnNumber = std::min(m_CurrentLeftColumnNumber + 1, GetMaxLeftColumnNumber());
+            break;
+
+        case VK_PRIOR:
+            m_CurrentTopLineNumber = std::max(0, m_CurrentTopLineNumber - GetVisibleLinesInWindow());
+            break;
+
+        case VK_NEXT:
+            m_CurrentTopLineNumber = std::min(m_CurrentTopLineNumber + GetVisibleLinesInWindow(), GetMaxTopLineNumber());
+            break;
+
+        case VK_HOME:
+            m_CurrentTopLineNumber = 0;
+            break;
+
+        case VK_END:
+            m_CurrentTopLineNumber = GetMaxTopLineNumber();
+            break;
+
+        default:
+            break;
+        }
+
+        InvalidateRect(m_hWnd, nullptr, true);
+    }
+
+    void OnTimer(WPARAM wParam, LPARAM)
+    {
+        switch (wParam)
+        {
+        case RedrawTimer:
+            InvalidateRect(m_hWnd, nullptr, true);
+            break;
+
+        case DragScrollTimer:
+            {
+                POINT pt;
+                ::GetCursorPos(&pt);
+                ::ScreenToClient(m_hWnd, &pt);
+
+                auto lock = m_Lock.LockUnique();
+                DoDragScrollNoLock(pt.x, pt.y);
+
+                if (m_SelStart.has_value())
+                {
+                    m_SelEnd = TextLocationFromPosition(pt.x, pt.y, false);
+                    InvalidateRect(m_hWnd, nullptr, true);
+                }
+            }
+            break;
+
+        default:
+            break;
+        }
+    }
+
+    void InitializeGDIResource()
+    {
+        UninitializeGDIResource();
+
+        HDC hDC = ::GetDC(m_hWnd);
+        m_hBackDC = ::CreateCompatibleDC(hDC);
+        m_SavedDCState = ::SaveDC(m_hBackDC);
+
+        RECT rc;
+        ::GetClientRect(m_hWnd, &rc);
+        m_hBackBitmap = ::CreateCompatibleBitmap(hDC, rc.right - rc.left, rc.bottom - rc.top);
+        ::SelectObject(m_hBackDC, m_hBackBitmap);
+
+        ::ReleaseDC(m_hWnd, hDC);
+
+#ifdef JAPANESE
+        m_hFont = ::CreateFont(
+            -14,
+            0,
+            0,
+            0,
+            FW_REGULAR,
+            false,
+            false,
+            false,
+            SHIFTJIS_CHARSET,
+            OUT_DEFAULT_PRECIS,
+            CLIP_DEFAULT_PRECIS,
+            DEFAULT_QUALITY,
+            FIXED_PITCH | FF_DONTCARE,
+            _T("\82l\82\83S\83V\83b\83N")
+        );
+#else
+        m_hFont = ::CreateFont(
+            -14,
+            0,
+            0,
+            0,
+            FW_REGULAR,
+            false,
+            false,
+            false,
+            DEFAULT_CHARSET,
+            OUT_DEFAULT_PRECIS,
+            CLIP_DEFAULT_PRECIS,
+            DEFAULT_QUALITY,
+            FIXED_PITCH | FF_DONTCARE,
+            _T("Consolas")
+        );
+#endif
+        ::SelectObject(m_hBackDC, m_hFont);
+        ::SetBkColor(m_hBackDC, BackgroundColor);
+
+        TEXTMETRIC tm;
+        ::GetTextMetrics(m_hBackDC, &tm);
+        m_FontHeight = tm.tmHeight;
+        m_FontWidth = tm.tmAveCharWidth;
+
+        m_hBgBrush = ::CreateSolidBrush(BackgroundColor);
+    }
+
+    void UninitializeGDIResource()
+    {
+        if (m_hBgBrush)
+        {
+            ::DeleteObject(m_hBgBrush);
+            m_hBgBrush = nullptr;
+        }
+
+        if (m_hBackDC)
+        {
+            ::RestoreDC(m_hBackDC, m_SavedDCState);
+            m_SavedDCState = 0;
+            ::DeleteObject(m_hFont);
+            m_hFont = nullptr;
+            ::DeleteObject(m_hBackBitmap);
+            m_hBackBitmap = nullptr;
+            ::DeleteDC(m_hBackDC);
+            m_hBackDC = nullptr;
+        }
+    }
+
+    int GetMaxTopLineNumber() const
+    {
+        return std::max(0, static_cast<int>(m_Buffer.GetLineCount() - GetVisibleLinesInWindow()));
+    }
+
+    int GetMaxLeftColumnNumber() const
+    {
+        return m_Buffer.GetMaxColumnLength() - GetVisileColumnsInWindow();
+    }
+
+    int GetVisibleLinesInWindow() const
+    {
+        RECT rc;
+        ::GetClientRect(m_hWnd, &rc);
+        return (rc.bottom - rc.top) / m_FontHeight;
+    }
+
+    int GetVisileColumnsInWindow() const
+    {
+        RECT rc;
+        ::GetClientRect(m_hWnd, &rc);
+        return (rc.right - rc.left) / m_FontWidth;
+    }
+
+    void DoDragScrollNoLock(int x, int y)
+    {
+        RECT rc;
+        ::GetClientRect(m_hWnd, &rc);
+
+        if (x < rc.left)
+        {
+            m_CurrentLeftColumnNumber = std::max(0, m_CurrentLeftColumnNumber - 1);
+        }
+        else if (rc.right <= x)
+        {
+            m_CurrentLeftColumnNumber = std::min(m_CurrentLeftColumnNumber + 1, GetMaxLeftColumnNumber());
+        }
+
+        if (y < rc.top)
+        {
+            m_CurrentTopLineNumber = std::max(0, m_CurrentTopLineNumber - 1);
+        }
+        else if (rc.bottom <= y)
+        {
+            m_CurrentTopLineNumber = std::min(m_CurrentTopLineNumber + 1, GetMaxTopLineNumber());
+        }
+    }
+
+    bool ShouldAutoScroll() const
+    {
+        return !m_SelStart.has_value() && !m_SelEnd.has_value() && GetMaxTopLineNumber() <= m_CurrentTopLineNumber;
+    }
+
+    void DoAutoScroll()
+    {
+        m_CurrentTopLineNumber = GetMaxTopLineNumber();
+    }
+
+    void UpdateScrollBarsNoLock()
+    {
+        SCROLLINFO siv = {};
+        SCROLLINFO sih = {};
+
+        siv.cbSize = sizeof(SCROLLINFO);
+        siv.fMask = SIF_ALL | SIF_DISABLENOSCROLL;
+        siv.nMin = 0;
+        siv.nMax = m_Buffer.GetLineCount() - 1;
+        siv.nPage = static_cast<UINT>(GetVisibleLinesInWindow());
+        siv.nPos = m_CurrentTopLineNumber;
+
+        sih.cbSize = sizeof(SCROLLINFO);
+        sih.fMask = SIF_ALL | SIF_DISABLENOSCROLL;
+        sih.nMin = 0;
+        sih.nMax = m_Buffer.GetMaxColumnLength() - 1;
+        sih.nPage = static_cast<UINT>(GetVisileColumnsInWindow());
+        sih.nPos = m_CurrentLeftColumnNumber;
+
+        ::SetScrollInfo(m_hWnd, SB_VERT, &siv, true);
+        ::SetScrollInfo(m_hWnd, SB_HORZ, &sih, true);
+    }
+
+    std::optional<TextLocationInfo> TextLocationFromPosition(int x, int y, bool exact) const
+    {
+        int line = m_CurrentTopLineNumber + y / m_FontHeight;
+
+        if (!exact)
+        {
+            line = std::clamp(line, 0, std::max(static_cast<int>(m_Buffer.GetLineCount() - 1), 0));
+        }
+
+        if (0 <= line && line < m_Buffer.GetLineCount())
+        {
+            int col = m_CurrentLeftColumnNumber + x / m_FontWidth;
+
+            if (!exact)
+            {
+                col = std::clamp(col, 0, std::max(static_cast<int>(m_Buffer.GetColumnLength(line) - 1), 0));
+            }
+
+            if (0 <= col && col < m_Buffer.GetColumnLength(line))
+            {
+                return TextLocationInfo{static_cast<std::size_t>(line), static_cast<std::size_t>(col)};
+            }
+        }
+
+        return std::nullopt;
+    }
+
+    std::pair<int, int> PositionFromTextLocation(TextLocationInfo loc) const
+    {
+        return {
+            static_cast<int>((loc.Column - m_CurrentLeftColumnNumber) * m_FontWidth),
+            static_cast<int>((loc.Line - m_CurrentTopLineNumber) * m_FontHeight)
+        };
+    }
+
+    StyledTextBuffer& m_Buffer;
+    std::optional<TextLocationInfo> m_SelStart;
+    std::optional<TextLocationInfo> m_SelEnd;   // inclusive
+    SRWLock m_Lock;
+
+    HWND m_hWnd = nullptr;
+
+    // scroll info
+    int m_CurrentTopLineNumber = 0;    // line # of the top line of the window
+    int m_CurrentLeftColumnNumber = 0;
+
+    HDC m_hBackDC = nullptr;
+    int m_SavedDCState = 0;
+    HBITMAP m_hBackBitmap = nullptr;
+    HFONT m_hFont = nullptr;
+    int m_FontHeight = 0;
+    int m_FontWidth = 0;   // monospace font only
+    HBRUSH m_hBgBrush = nullptr;
+};
+
+} // namespace TimW32gNewConsole
+
+extern "C" void ClearNewConsoleBuffer(void)
+{
+    if (::IsWindow(hConsoleWnd))
+    {
+        auto pConsoleWindow = reinterpret_cast<TimW32gNewConsole::NewConsoleWindow*>(
+            ::GetWindowLongPtr(::GetDlgItem(hConsoleWnd, IDC_EDIT), GWLP_USERDATA)
+        );
+
+        if (pConsoleWindow)
+        {
+            pConsoleWindow->Clear();
+        }
+    }
+
+    TimW32gNewConsole::GlobalNewConsoleBuffer.Clear();
+}
+
+extern "C" void NewConsoleBufferWriteCMsg(int type, int verbosity_level, LPCTSTR str)
+{
+    COLORREF color = TimW32gNewConsole::NormalColor;
+
+    if (type == CMSG_FATAL || type == CMSG_ERROR)
+    {
+        color = TimW32gNewConsole::ErrorColor;
+    }
+    else if (type == CMSG_WARNING)
+    {
+        color = TimW32gNewConsole::WarningColor;
+    }
+    else if (type == CMSG_INFO && verbosity_level <= VERB_NORMAL)
+    {
+        color = TimW32gNewConsole::InfoColor;
+    }
+
+    if (::IsWindow(hConsoleWnd))
+    {
+        auto pConsoleWindow = reinterpret_cast<TimW32gNewConsole::NewConsoleWindow*>(
+            ::GetWindowLongPtr(::GetDlgItem(hConsoleWnd, IDC_EDIT), GWLP_USERDATA)
+        );
+
+        if (pConsoleWindow)
+        {
+            pConsoleWindow->Write(color, str, true);
+            return;
+        }
+    }
+
+    TimW32gNewConsole::GlobalNewConsoleBuffer.Append(color, str);
+    TimW32gNewConsole::GlobalNewConsoleBuffer.AppendNewline();
+}
+
+extern "C" void InitializeNewConsole(void)
+{
+    WNDCLASSEX wc = {};
+    wc.cbSize = sizeof(wc);
+
+    if (!::GetClassInfoEx(::GetModuleHandle(nullptr), TimW32gNewConsole::pClassName, &wc))
+    {
+        wc.style = CS_HREDRAW | CS_VREDRAW;
+        wc.lpfnWndProc = &TimW32gNewConsole::NewConsoleWindow::WindowProc;
+        wc.cbClsExtra = 0;
+        wc.cbWndExtra = 0;
+        wc.hInstance = ::GetModuleHandle(nullptr);
+        wc.hIcon = nullptr;
+        wc.hCursor = ::LoadCursor(nullptr, IDC_ARROW);
+        wc.hbrBackground = nullptr;
+        wc.lpszMenuName = nullptr;
+        wc.lpszClassName = TimW32gNewConsole::pClassName;
+        wc.hIconSm = nullptr;
+
+        ::RegisterClassEx(&wc);
+    }
+}
+
+extern "C" void NewConsoleClear(HWND hwnd)
+{
+    auto pConsoleWindow = reinterpret_cast<TimW32gNewConsole::NewConsoleWindow*>(::GetWindowLongPtr(hwnd, GWLP_USERDATA));
+
+    if (pConsoleWindow)
+    {
+        pConsoleWindow->Clear();
+    }
+}
+
+extern "C" void NewConsoleWrite(HWND hwnd, LPCTSTR str)
+{
+    auto pConsoleWindow = reinterpret_cast<TimW32gNewConsole::NewConsoleWindow*>(::GetWindowLongPtr(hwnd, GWLP_USERDATA));
+
+    if (pConsoleWindow)
+    {
+        pConsoleWindow->Write(str);
+    }
+}
+
+extern "C" void NewConsoleWriteV(HWND hwnd, LPCTSTR format, va_list args)
+{
+    auto pConsoleWindow = reinterpret_cast<TimW32gNewConsole::NewConsoleWindow*>(::GetWindowLongPtr(hwnd, GWLP_USERDATA));
+
+    if (pConsoleWindow)
+    {
+        pConsoleWindow->WriteV(format, args);
+    }
+}
diff --git a/interface/w32g_new_console.h b/interface/w32g_new_console.h
new file mode 100644 (file)
index 0000000..db4f8b3
--- /dev/null
@@ -0,0 +1,14 @@
+
+#pragma once
+
+#ifdef TIMW32G_USE_NEW_CONSOLE
+
+void ClearNewConsoleBuffer(void);
+void NewConsoleBufferWriteCMsg(int type, int verbosity_level, LPCTSTR str);
+
+void InitializeNewConsole(void);
+void NewConsoleClear(HWND hwnd);
+void NewConsoleWrite(HWND hwnd, LPCTSTR str);
+void NewConsoleWriteV(HWND hwnd, LPCTSTR format, va_list args);
+
+#endif // TIMW32G_USE_NEW_CONSOLE
index ef4d552..8a570f3 100644 (file)
@@ -605,7 +605,6 @@ void w32g_update_playlist_pos(int pos)
                PlayListEntry *entry = &playlist_ctrl->list[i];
                w32g_get_midi_file_info_post(entry);
                {
-                       char *title;
                        volatile LVITEM lvi0;
                        lvi0.iItem = i;
                        lvi0.iSubItem = 0;
@@ -1166,7 +1165,6 @@ void w32g_rotate_playlist(int dest)
     HWND hList;
     PlayListEntry save;
 #ifdef LISTVIEW_PLAYLIST
-       volatile LVITEM lvi[6];
 #else
        char temp[1024];
 #endif
@@ -1406,27 +1404,38 @@ void w32g_free_playlist(void)
        for(j=0; j < PLAYLIST_MAX; j++){
                for(i=0; i < playlist[j].nfiles; i++){
                        entry = &playlist[j].list[i];
-                       if(entry->filepath != NULL) free(entry->filepath);
+            free(entry->filepath);
+            entry->filepath = NULL;
 #ifdef LISTVIEW_PLAYLIST
-                       if(entry->duration != NULL) free(entry->duration);
-                       if(entry->filetype != NULL) free(entry->filetype);
-                       if(entry->system != NULL) free(entry->system);
+            free(entry->duration);
+            entry->duration = NULL;
+                       free(entry->filetype);
+            entry->filetype = NULL;
+                       free(entry->system);
+            entry->system = NULL;
 #endif
                }
-               if(playlist[j].list != NULL) free(playlist[j].list);
+               free(playlist[j].list);
+        playlist[j].list = NULL;
        }
-       if(playlist_shuffle.list != NULL) free(playlist_shuffle.list);
+       free(playlist_shuffle.list);
+    playlist_shuffle.list = NULL;
        
 #ifdef LISTVIEW_PLAYLIST
        // clear tmp_playlist
        for(i = 0; i < tmp_playlist.nfiles; i++){
                entry = &tmp_playlist.list[i];
-               if(entry->filepath != NULL) free(entry->filepath);
-               if(entry->duration != NULL) free(entry->duration);
-               if(entry->filetype != NULL) free(entry->filetype);
-               if(entry->system != NULL) free(entry->system);
+               free(entry->filepath);
+        entry->filepath = NULL;
+               free(entry->duration);
+        entry->duration = NULL;
+               free(entry->filetype);
+        entry->filetype = NULL;
+               free(entry->system);
+        entry->system = NULL;
        }
-       if(tmp_playlist.list != NULL) free(tmp_playlist.list);
+       free(tmp_playlist.list);
+    tmp_playlist.list = NULL;
 #endif
 }
 
index c242f42..55d3ac0 100644 (file)
@@ -796,6 +796,9 @@ static void PrefSettingApply(void)
 #ifdef INT_SYNTH
        init_int_synth();
 #endif // INT_SYNTH
+#ifdef ENABLE_SFZ
+       init_sfz();
+#endif
        initialize_resampler_coeffs();
 
 #ifdef SUPPORT_SOUNDSPEC
@@ -4920,10 +4923,12 @@ static const TCHAR *cb_info_IDC_COMBO_REVC_EX_RV_NUM[] = {
 // IDC_COMBO_REVC_EX_AP_NUM
 static int cb_info_IDC_COMBO_REVC_EX_AP_NUM_num[] = {
        0,
+       4,
        8,
 };
 static const TCHAR *cb_info_IDC_COMBO_REVC_EX_AP_NUM[] = {
        TEXT("0"),
+       TEXT("4"),
        TEXT("8"),
 };
 
@@ -8562,14 +8567,6 @@ static const TCHAR *cb_info_IDC_COMBO_WASAPI_STREAM_CATEGORY[] = {
     TEXT("Media"),
 };
 
-#define cb_num_IDC_COMBO_WASAPI_STREAM_OPTION 3
-static const TCHAR *cb_info_IDC_COMBO_WASAPI_STREAM_OPTION[] = {
-    TEXT("None"),
-    TEXT("Raw"),
-    TEXT("MatchFormat"),
-};
-
-
 LRESULT WINAPI wasapiConfigDialogProc(HWND hwnd, UINT msg, WPARAM wp, LPARAM lp)
 {
        int i = 0, cb_num = 0, cb_sel = 0, flag;
@@ -8628,21 +8625,18 @@ LRESULT WINAPI wasapiConfigDialogProc(HWND hwnd, UINT msg, WPARAM wp, LPARAM lp)
                                DI_DISABLE(IDC_COMBO_WASAPI_STREAM_CATEGORY);
                        }
                        // WASAPI Stream Option
-                       if(winver >= 6) // win10
-                               max = cb_num_IDC_COMBO_WASAPI_STREAM_OPTION;
-                       else if(winver >= 4) // win8.1
-                               max = 2;
-                       else
-                               max = 1;
-                       for (i = 0; i < max; i++)
-                               CB_INSSTR(IDC_COMBO_WASAPI_STREAM_OPTION, cb_info_IDC_COMBO_WASAPI_STREAM_OPTION[i]);
                        if(winver >= 6){ // win10
-                               CB_SET(IDC_COMBO_WASAPI_STREAM_OPTION, (st_temp->wasapi_stream_option));
+                               SendDlgItemMessage(hwnd, IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_RAW, BM_SETCHECK, (st_temp->wasapi_stream_option & 1) ? BST_CHECKED : BST_UNCHECKED, 0);
+                               SendDlgItemMessage(hwnd, IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_MATCH_FORMAT, BM_SETCHECK, (st_temp->wasapi_stream_option & 2) ? BST_CHECKED : BST_UNCHECKED, 0);
+                               SendDlgItemMessage(hwnd, IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_AMBISONICS, BM_SETCHECK, (st_temp->wasapi_stream_option & 4) ? BST_CHECKED : BST_UNCHECKED, 0);
                        }else if(winver >= 4){ // win8.1
-                               CB_SET(IDC_COMBO_WASAPI_STREAM_OPTION, (st_temp->wasapi_stream_option >= 2 ? 0 : st_temp->wasapi_stream_option));
+                               SendDlgItemMessage(hwnd, IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_RAW, BM_SETCHECK, (st_temp->wasapi_stream_option & 1) ? BST_CHECKED : BST_UNCHECKED, 0);
+                               DI_DISABLE(IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_MATCH_FORMAT);
+                               DI_DISABLE(IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_AMBISONICS);
                        }else{
-                               CB_SET(IDC_COMBO_WASAPI_STREAM_OPTION, 0);
-                               DI_DISABLE(IDC_COMBO_WASAPI_STREAM_OPTION);
+                               DI_DISABLE(IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_RAW);
+                               DI_DISABLE(IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_MATCH_FORMAT);
+                               DI_DISABLE(IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_AMBISONICS);
                        }
 
                        SetFocus(DI_GET(IDOK));
@@ -8694,7 +8688,13 @@ LRESULT WINAPI wasapiConfigDialogProc(HWND hwnd, UINT msg, WPARAM wp, LPARAM lp)
                                // WASAPI Stream Category
                                st_temp->wasapi_stream_category = CB_GET(IDC_COMBO_WASAPI_STREAM_CATEGORY);
                                // WASAPI Stream Option
-                               st_temp->wasapi_stream_option = CB_GET(IDC_COMBO_WASAPI_STREAM_OPTION);
+                               st_temp->wasapi_stream_option = 0;
+                               if (SendDlgItemMessage(hwnd, IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_RAW, BM_GETCHECK, 0, 0))
+                                       st_temp->wasapi_stream_option |= 1;
+                               if (SendDlgItemMessage(hwnd, IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_MATCH_FORMAT, BM_GETCHECK, 0, 0))
+                                       st_temp->wasapi_stream_option |= 2;
+                               if (SendDlgItemMessage(hwnd, IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_AMBISONICS, BM_GETCHECK, 0, 0))
+                                       st_temp->wasapi_stream_option |= 4;
 
                                EndDialog(hwnd,TRUE);
                                break;
index 45dca32..c4a12d4 100644 (file)
 #define IDC_BUTTON_EXITPROCESS         2602
 #define IDC_BUTTON_HEAP_CHECK          2603
 #define IDC_BUTTON_VARIABLES_CHECK     2604
+#define IDC_BUTTON_VAR_ENTER           2640
+#define IDC_EDIT_VAR0                  2650
+#define IDC_EDIT_VAR1                  2651
+#define IDC_EDIT_VAR2                  2652
+#define IDC_EDIT_VAR3                  2653
+#define IDC_EDIT_VAR4                  2654
+#define IDC_EDIT_VAR5                  2655
+#define IDC_EDIT_VAR6                  2656
+#define IDC_EDIT_VAR7                  2657
+#define IDC_EDIT_VAR8                  2658
+#define IDC_EDIT_VAR9                  2659
 
 //// player
 // cfg
 #define IDC_RADIOBUTTON_WASAPI_POLLING  8056
 #define IDC_COMBO_WASAPI_PRIORITY       8058
 #define IDC_COMBO_WASAPI_STREAM_CATEGORY  8060
-#define IDC_COMBO_WASAPI_STREAM_OPTION  8061
-#define IDC_EDIT_WASAPI_LATENCY         8062
-#define IDC_STATIC_WASAPI_LATENCY_MIN   8063
-#define IDC_STATIC_WASAPI_LATENCY_MAX   8064
+#define IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_RAW  8061
+#define IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_MATCH_FORMAT  8062
+#define IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_AMBISONICS  8063
+#define IDC_EDIT_WASAPI_LATENCY         8064
+#define IDC_STATIC_WASAPI_LATENCY_MIN   8065
+#define IDC_STATIC_WASAPI_LATENCY_MAX   8066
 // portaudio
 #define IDC_COMBO_PA_ASIO_DEV           8100
 #define IDC_BUTTON_PA_ASIO_CONFIG       8101
index afa1b5e..29d76b7 100644 (file)
@@ -11,6 +11,7 @@
 \r
 \r
 \r
+\r
 //\r
 // Bitmap resources\r
 //\r
@@ -171,7 +172,7 @@ FONT 8, "Tahoma", 400, 0, 0
 \r
 \r
 LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL\r
-IDD_DIALOG_DEBUG DIALOG 0, 0, 240, 240\r
+IDD_DIALOG_DEBUG DIALOG 0, 0, 309, 239\r
 STYLE DS_SETFONT | WS_CAPTION | WS_SYSMENU\r
 CAPTION "Debug Window (TiMidity++ Win32GUI)"\r
 FONT 8, "Tahoma"\r
@@ -185,6 +186,17 @@ FONT 8, "Tahoma"
     PUSHBUTTON      "Heaps Check", IDC_BUTTON_HEAP_CHECK, 4, 188, 50, 16, BS_CENTER\r
     PUSHBUTTON      "Variables Check", IDC_BUTTON_VARIABLES_CHECK, 60, 188, 50, 16, BS_CENTER\r
     EDITTEXT        IDC_EDIT, 0, 0, 240, 160, WS_HSCROLL | WS_VSCROLL | ES_AUTOHSCROLL | ES_AUTOVSCROLL | ES_MULTILINE | ES_NOHIDESEL | ES_READONLY | ES_WANTRETURN\r
+    EDITTEXT        IDC_EDIT_VAR0, 260, 5, 35, 12, ES_AUTOHSCROLL | ES_NUMBER\r
+    EDITTEXT        IDC_EDIT_VAR1, 260, 20, 35, 12, ES_AUTOHSCROLL | ES_NUMBER\r
+    EDITTEXT        IDC_EDIT_VAR2, 260, 35, 35, 12, ES_AUTOHSCROLL | ES_NUMBER\r
+    EDITTEXT        IDC_EDIT_VAR3, 260, 50, 35, 12, ES_AUTOHSCROLL | ES_NUMBER\r
+    EDITTEXT        IDC_EDIT_VAR4, 260, 65, 35, 12, ES_AUTOHSCROLL | ES_NUMBER\r
+    EDITTEXT        IDC_EDIT_VAR5, 260, 80, 35, 12, ES_AUTOHSCROLL | ES_NUMBER\r
+    EDITTEXT        IDC_EDIT_VAR6, 260, 95, 35, 12, ES_AUTOHSCROLL | ES_NUMBER\r
+    EDITTEXT        IDC_EDIT_VAR7, 260, 110, 35, 12, ES_AUTOHSCROLL | ES_NUMBER\r
+    EDITTEXT        IDC_EDIT_VAR8, 260, 125, 35, 12, ES_AUTOHSCROLL | ES_NUMBER\r
+    EDITTEXT        IDC_EDIT_VAR9, 260, 140, 35, 12, ES_AUTOHSCROLL | ES_NUMBER\r
+    PUSHBUTTON      "Var Enter", IDC_BUTTON_VAR_ENTER, 260, 165, 35, 14, BS_CENTER\r
 }\r
 \r
 \r
@@ -551,7 +563,7 @@ CAPTION "TiMidity++ Win32GUI 
 FONT 9, "MS UI Gothic"\r
 {\r
     DEFPUSHBUTTON   "OK", IDOK, 100, 330, 50, 15\r
-    PUSHBUTTON      "·¬Ý¾Ù", IDCANCEL, 155, 330, 50, 15\r
+    PUSHBUTTON      "\83L\83\83\83\93\83Z\83\8b", IDCANCEL, 155, 330, 50, 15\r
     PUSHBUTTON      "\93K\97p", IDC_BUTTON_APPLY, 210, 330, 50, 15\r
     CONTROL         "Tab1", IDC_TAB_MAIN, WC_TABCONTROL, WS_GROUP | WS_TABSTOP | TCS_FORCELABELLEFT | TCS_HOTTRACK | TCS_RAGGEDRIGHT, 4, 25, 355, 300\r
     PUSHBUTTON      "INI\83t\83@\83C\83\8b", IDC_BUTTON_INI_FILE, 5, 5, 50, 15, BS_CENTER\r
@@ -751,7 +763,7 @@ FONT 8, "Tahoma"
 \r
 \r
 LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL\r
-IDD_DIALOG_WASAPI DIALOGEX 0, 0, 215, 200\r
+IDD_DIALOG_WASAPI DIALOGEX 0, 0, 215, 220\r
 STYLE DS_MODALFRAME | DS_SETFONT | WS_CAPTION | WS_POPUP | WS_SYSMENU\r
 CAPTION "WASAPI Config"\r
 FONT 9, "MS UI Gothic", 0, 0, 1\r
@@ -766,7 +778,9 @@ FONT 9, "MS UI Gothic", 0, 0, 1
     LTEXT           "Stream Category", IDC_STATIC, 5, 155, 50, 8, SS_LEFT\r
     COMBOBOX        IDC_COMBO_WASAPI_STREAM_CATEGORY, 60, 153, 100, 120, WS_TABSTOP | WS_VSCROLL | CBS_DROPDOWNLIST | CBS_SORT\r
     LTEXT           "Stream Option", IDC_STATIC, 5, 170, 43, 8, SS_LEFT\r
-    COMBOBOX        IDC_COMBO_WASAPI_STREAM_OPTION, 60, 168, 100, 54, WS_TABSTOP | WS_VSCROLL | CBS_DROPDOWNLIST | CBS_SORT\r
+    AUTOCHECKBOX    "Raw", IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_RAW, 60, 170, 100, 8\r
+    AUTOCHECKBOX    "Match Format", IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_MATCH_FORMAT, 60, 185, 100, 8\r
+    AUTOCHECKBOX    "Ambisonics", IDC_CHECKBOX_WASAPI_STREAM_OPTIONS_AMBISONICS, 60, 200, 100, 8\r
     GROUPBOX        "Format", IDC_STATIC, 60, 35, 105, 45, WS_GROUP\r
     AUTORADIOBUTTON "WAVEFORMATEX", IDC_RADIOBUTTON_WASAPI_FORMAT_EX, 65, 50, 69, 8, WS_GROUP | WS_TABSTOP\r
     AUTORADIOBUTTON "WAVEFORMATEXTENSIBLE", IDC_RADIOBUTTON_WASAPI_FORMAT_EXT, 65, 65, 95, 8, WS_TABSTOP\r
@@ -3309,9 +3323,9 @@ FONT 9, "MS UI Gothic"
     AUTOCHECKBOX    "\89¹\97Ê\8e©\93®\95â\90³", IDC_CHECKBOX_AMP_COMPENSATION, 240, 110, 75, 10\r
     LTEXT           "\83{\83C\83X", IDC_STATIC, 240, 20, 59, 8, SS_LEFT\r
     AUTOCHECKBOX    "\83A\83\93\83`\83G\83C\83\8a\83A\83X", IDC_CHECKBOX_ANTIALIAS, 240, 190, 70, 10, BS_LEFT | BS_TOP\r
-    LTEXT           "\95â\8a®\8f\88\97\9d", IDC_STATIC, 240, 220, 35, 8, SS_LEFT\r
+    LTEXT           "\95â\8aÔ\8f\88\97\9d", IDC_STATIC, 240, 220, 35, 8, SS_LEFT\r
     COMBOBOX        IDC_COMBO_RESAMPLE, 291, 218, 50, 125, WS_TABSTOP | WS_VSCROLL | CBS_DROPDOWNLIST | CBS_SORT\r
-    LTEXT           "\95â\8a®\83p\83\89\83\81\81[\83^", IDC_STATIC, 240, 235, 44, 8, SS_LEFT\r
+    LTEXT           "\95â\8aÔ\83p\83\89\83\81\81[\83^", IDC_STATIC, 240, 235, 44, 8, SS_LEFT\r
     COMBOBOX        IDC_COMBO_MIDI_TYPE, 10, 18, 100, 200, WS_TABSTOP | WS_VSCROLL | CBS_DROPDOWNLIST | CBS_SORT\r
     GROUPBOX        "\94g\8c`\92²\90®", IDC_STATIC, 235, 175, 110, 105\r
     AUTOCHECKBOX    "\83T\83X\83e\83B\83\93\90§\8cÀ (ms)", IDC_CHECKBOX_DECAY, 125, 215, 70, 8\r
@@ -3323,7 +3337,7 @@ FONT 9, "MS UI Gothic"
     CONTROL         "", IDC_SLIDER_OUTPUT_AMP, TRACKBAR_CLASS, WS_TABSTOP | TBS_AUTOTICKS | TBS_TOP | TBS_NOTICKS, 240, 50, 100, 7\r
     CONTROL         "", IDC_SLIDER_CHORUS, TRACKBAR_CLASS, WS_TABSTOP | TBS_AUTOTICKS | TBS_TOP | TBS_NOTICKS, 125, 65, 100, 7\r
     CONTROL         "", IDC_SLIDER_REVERB, TRACKBAR_CLASS, WS_TABSTOP | TBS_AUTOTICKS | TBS_TOP | TBS_NOTICKS, 125, 35, 100, 7\r
-    LTEXT           "\95â\8a®\83t\83B\83\8b\83^", IDC_STATIC, 240, 250, 34, 8, SS_LEFT\r
+    LTEXT           "\95â\8aÔ\83t\83B\83\8b\83^", IDC_STATIC, 240, 250, 34, 8, SS_LEFT\r
     COMBOBOX        IDC_COMBO_RESAMPLE_FILTER, 290, 248, 50, 200, WS_TABSTOP | WS_VSCROLL | CBS_DROPDOWNLIST | CBS_SORT\r
     LTEXT           "\92Ç\89Á\8dÄ\90\8e\9e\8aÔ(sec)", IDC_STATIC, 240, 125, 57, 8, SS_LEFT\r
     LTEXT           "emu_delay_time(ms)", IDC_STATIC, 240, 155, 59, 8, SS_LEFT\r
@@ -3711,7 +3725,7 @@ FONT 9, "MS UI Gothic"
     LTEXT           "99999.9999", IDC_EDIT_COMPUTE_BUFFER_TIME, 185, 35, 35, 8, SS_LEFT\r
     LTEXT           "99999.9999", IDC_EDIT_DEVICE_BUFFER_TIME, 185, 120, 35, 8, SS_LEFT\r
     LTEXT           "\83u\83\8d\83b\83N\8e\9e\8aÔ (ms)", IDC_STATIC, 125, 120, 55, 8, SS_LEFT\r
-    AUTOCHECKBOX    "\89¹\90F\93Ç\8d\9e\8e\9e\95â\8a®\8f\88\97\9d", IDC_CHECKBOX_PRE_RESAMPLE, 240, 180, 78, 8\r
+    AUTOCHECKBOX    "\89¹\90F\93Ç\8d\9e\8e\9e\95â\8aÔ\8f\88\97\9d", IDC_CHECKBOX_PRE_RESAMPLE, 240, 180, 78, 8\r
     AUTOCHECKBOX    "\8bN\93®\8e\9e\82É\91S\89¹\90F\93Ç\8d\9e", IDC_CHECKBOX_LOAD_ALL_INST, 240, 224, 76, 8, BS_LEFT | BS_TOP\r
     LTEXT           "\83T\83C\83Y (sample)", IDC_STATIC, 125, 50, 50, 8, SS_LEFT\r
     LTEXT           "99999.9999", IDC_EDIT_COMPUTE_BUFFER_SIZE, 185, 50, 35, 8, SS_LEFT\r
index 03eb7cf..4b50608 100644 (file)
@@ -118,7 +118,7 @@ static void ConsoleWndVerbosityUpdate(void);
 static void ConsoleWndVerbosityApply(void);
 static void ConsoleWndValidUpdate(void);
 static void ConsoleWndValidApply(void);
-static void ConsoleWndVerbosityApplyIncDec(int num);
+static void ConsoleWndVerbosityApplySet(int num);
 static int ConsoleWndInfoReset(HWND hwnd);
 static int ConsoleWndInfoApply(void);
 
@@ -136,6 +136,10 @@ void InitConsoleWnd(HWND hParentWnd)
 ///r
        ConsoleWndInfoReset(hConsoleWnd);
        INILoadConsoleWnd();
+       
+#ifdef TIMW32G_USE_NEW_CONSOLE
+       InitializeNewConsole();
+#endif
 
        switch(PlayerLanguage){
        case LANGUAGE_ENGLISH:
@@ -156,9 +160,11 @@ void InitConsoleWnd(HWND hParentWnd)
        INILoadConsoleWnd();
        ConsoleWndInfoApply();
        UpdateWindow(hConsoleWnd);
-       ConsoleWndVerbosityApplyIncDec(0);
+       ConsoleWndVerbosityApply();
        CheckDlgButton(hConsoleWnd, IDC_CHECKBOX_VALID, ConsoleWndFlag);
+#ifndef TIMW32G_USE_NEW_CONSOLE
        Edit_LimitText(GetDlgItem(hConsoleWnd,IDC_EDIT), ConsoleWndMaxSize);
+#endif
 }
 
 // Window Procedure
@@ -187,10 +193,18 @@ ConsoleWndProc(HWND hwnd, UINT uMess, WPARAM wParam, LPARAM lParam)
                        ConsoleWndVerbosityApply();
                        break;
                case IDC_BUTTON_INC:
-                       ConsoleWndVerbosityApplyIncDec(1);
+                       {
+                               int n = (int)GetDlgItemInt(hwnd, IDC_EDIT_VERBOSITY, NULL, TRUE);
+                               n++;
+                               ConsoleWndVerbosityApplySet(n);
+                       }
                        break;
                case IDC_BUTTON_DEC:
-                       ConsoleWndVerbosityApplyIncDec(-1);
+                       {
+                               int n = (int)GetDlgItemInt(hwnd, IDC_EDIT_VERBOSITY, NULL, TRUE);
+                               n--;
+                               ConsoleWndVerbosityApplySet(n);
+                       }
                        break;
                default:
                        break;
@@ -344,12 +358,21 @@ ConsoleWndProc(HWND hwnd, UINT uMess, WPARAM wParam, LPARAM lParam)
 //             ShowWindow(hConsoleWnd, SW_HIDE);
                MainWndUpdateConsoleButton();
                break;
+#ifdef TIMW32G_USE_NEW_CONSOLE
+       case WM_ACTIVATE:
+               if (LOWORD(wParam) != WA_INACTIVE) {
+                       SetFocus(GetDlgItem(hConsoleWnd, IDC_EDIT));
+                       return TRUE;
+               }
+               break;
+#else
        case WM_SETFOCUS:
                HideCaret(hwnd);
                break;
        case WM_KILLFOCUS:
                ShowCaret(hwnd);
                break;
+#endif
        default:
                return FALSE;
        }
@@ -363,7 +386,11 @@ void PutsConsoleWnd(char *str)
        if(!IsWindow(hConsoleWnd) || !ConsoleWndFlag)
                return;
        hwnd = GetDlgItem(hConsoleWnd,IDC_EDIT);
+#ifdef TIMW32G_USE_NEW_CONSOLE
+       NewConsoleWrite(hwnd, str);
+#else
        PutsEditCtlWnd(hwnd,str);
+#endif
 }
 
 // printf()
@@ -375,7 +402,11 @@ void PrintfConsoleWnd(char *fmt, ...)
                return;
        hwnd = GetDlgItem(hConsoleWnd,IDC_EDIT);
        va_start(ap, fmt);
+#ifdef TIMW32G_USE_NEW_CONSOLE
+       NewConsoleWriteV(hwnd, fmt, ap);
+#else
        VprintfEditCtlWnd(hwnd,fmt,ap);
+#endif
        va_end(ap);
 }
 
@@ -386,7 +417,11 @@ void ClearConsoleWnd(void)
        if(!IsWindow(hConsoleWnd))
                return;
        hwnd = GetDlgItem(hConsoleWnd,IDC_EDIT);
+#ifdef TIMW32G_USE_NEW_CONSOLE
+       NewConsoleClear(hwnd);
+#else
        ClearEditCtlWnd(hwnd);
+#endif
 }
 
 // ---------------------------------------------------------------------------
@@ -397,7 +432,9 @@ static void ConsoleWndAllUpdate(void)
        ConsoleWndVerbosityUpdate();
        ConsoleWndValidUpdate();
        Edit_LimitText(GetDlgItem(hConsoleWnd,IDC_EDIT_VERBOSITY),3);
+#ifndef TIMW32G_USE_NEW_CONSOLE
        Edit_LimitText(GetDlgItem(hConsoleWnd,IDC_EDIT),ConsoleWndMaxSize);
+#endif
 }
 
 static void ConsoleWndValidUpdate(void)
@@ -432,10 +469,10 @@ static void ConsoleWndVerbosityApply(void)
        ConsoleWndVerbosityUpdate();
 }
 
-static void ConsoleWndVerbosityApplyIncDec(int num)
+static void ConsoleWndVerbosityApplySet(int num)
 {
        if(!IsWindow(hConsoleWnd)) return;
-       ctl->verbosity += num;
+       ctl->verbosity = num;
        RANGE(ctl->verbosity, -1, 4);
        ConsoleWndVerbosityUpdate();
 }
index ad48485..aa45f15 100644 (file)
                                RelativePath="..\interface\w32g_utl.c"
                                >
                        </File>
+                       <File
+                               RelativePath="..\timidity\w32g_vorbis_dll.c"
+                               >
+                       </File>
+                       <File
+                               RelativePath="..\timidity\w32g_vorbisfile_dll.c"
+                               >
+                       </File>
                        <Filter
                                Name="timidity"
                                >
                                        >
                                </File>
                                <File
+                                       RelativePath="..\timidity\decode.c"
+                                       >
+                               </File>
+                               <File
                                        RelativePath="..\timidity\effect.c"
                                        >
                                </File>
index fd90ad2..ac154a1 100644 (file)
     <ClCompile Include="..\timidity\audio_cnv.c" />
     <ClCompile Include="..\timidity\common.c" />
     <ClCompile Include="..\timidity\controls.c" />
+    <ClCompile Include="..\timidity\decode.c" />
     <ClCompile Include="..\timidity\effect.c" />
     <ClCompile Include="..\timidity\envelope.c" />
     <ClCompile Include="..\timidity\filter.c" />
     <ClCompile Include="..\timidity\timer2.c" />
     <ClCompile Include="..\timidity\version.c" />
     <ClCompile Include="..\timidity\voice_effect.c" />
+    <ClCompile Include="..\timidity\w32g_vorbisfile_dll.c" />
+    <ClCompile Include="..\timidity\w32g_vorbis_dll.c" />
     <ClCompile Include="..\timidity\w32_a.c" />
     <ClCompile Include="..\timidity\wave_a.c" />
     <ClCompile Include="..\timidity\wrdt.c" />
index 73e1223..d32a77f 100644 (file)
@@ -167,7 +167,7 @@ void ExportFile(HWND hDlg, bool bExportList)
 
        if (bExportList) {
                fd.setDefaultExt("txt");
-               fd.setFilter("soundfont (*.sf2;*.sf3)\0*.sf2;*.sf3\0All files (*.*)\0*.*\0\0");
+               fd.setFilter("Soundfont Preset List (*.txt)\0*.txt\0\0");
        } else {
                fd.setDefaultExt("cfg");
                fd.setFilter("TiMidity++ Config File (*.cfg)\0*.cfg\0\0");
@@ -247,7 +247,7 @@ LRESULT DlgMainProc_COMMAND(HWND hDlg, WPARAM wParam, LPARAM lParam)
                CMyFileDialog fd;
                fd.setOpenDlgDefaultSetting();
                fd.setTitle("open soundfont");
-               fd.setFilter("soundfont (*.sf2)\0*.sf2\0\0");
+               fd.setFilter("soundfont (*.sf2;*.sf3)\0*.sf2;*.sf3\0All files (*.*)\0*.*\0\0");
                fd.setOwner(hDlg);
                if (fd.Execute()) {
                        const int n = fd.getIndex();
index df77902..391fea7 100644 (file)
                                >
                        </File>
                        <File
+                               RelativePath="..\timidity\w32g_vorbis_dll.c"
+                               >
+                       </File>
+                       <File
+                               RelativePath="..\timidity\w32g_vorbisfile_dll.c"
+                               >
+                       </File>
+                       <File
                                RelativePath="..\interface\winsyn_c.c"
                                >
                        </File>
                                        >
                                </File>
                                <File
+                                       RelativePath="..\timidity\decode.c"
+                                       >
+                               </File>
+                               <File
                                        RelativePath="..\interface\dumb_c.c"
                                        >
                                </File>
index 4b592d7..8707202 100644 (file)
     <ClCompile Include="..\timidity\audio_cnv.c" />
     <ClCompile Include="..\timidity\common.c" />
     <ClCompile Include="..\timidity\controls.c" />
+    <ClCompile Include="..\timidity\decode.c" />
     <ClCompile Include="..\timidity\effect.c" />
     <ClCompile Include="..\timidity\envelope.c" />
     <ClCompile Include="..\timidity\filter.c" />
     <ClCompile Include="..\timidity\timer2.c" />
     <ClCompile Include="..\timidity\version.c" />
     <ClCompile Include="..\timidity\voice_effect.c" />
+    <ClCompile Include="..\timidity\w32g_vorbisfile_dll.c" />
+    <ClCompile Include="..\timidity\w32g_vorbis_dll.c" />
     <ClCompile Include="..\timidity\w32_a.c" />
     <ClCompile Include="..\timidity\w32_portaudio_dll.c" />
     <ClCompile Include="..\timidity\wasapi_a.c" />
index 2729be0..6768915 100644 (file)
@@ -21,6 +21,7 @@
 #include "timidity.h"
 #include "common.h"
 #include "controls.h"
+#include "instrum.h"
 #include "decode.h"
 
 #ifdef HAVE_LIBVORBIS
@@ -33,6 +34,8 @@ extern int load_vorbis_dll(void);     // w32g_vorbis_dll.c
 extern int load_vorbisfile_dll(void);  // w32g_vorbisfile_dll.c
 #endif
 
+static sample_t DummySampleData[128];
+
 static size_t oggvorbis_read_callback(void *ptr, size_t size, size_t nmemb, void *datasource)
 {
     struct timidity_file *tf = (struct timidity_file *)datasource;
@@ -61,6 +64,9 @@ SampleDecodeResult decode_oggvorbis(struct timidity_file *tf)
        ptr_size_t data_length;
        ptr_size_t current_size;
 
+       sdr.data = DummySampleData;
+       sdr.data_type = SAMPLE_TYPE_INT16;
+
     if (load_vorbis_dll() != 0) {
         ctl->cmsg(CMSG_ERROR, VERB_NORMAL, "unable to load vorbis dll");
         return sdr;
@@ -100,6 +106,7 @@ SampleDecodeResult decode_oggvorbis(struct timidity_file *tf)
        data_length = (data_length > 0 ? data_length : 4096);
        current_size = 0;
     sdr.data = (sample_t *)safe_large_malloc(data_length);
+       sdr.data_alloced = 1;
 
     while (1) {
         int bitstream = 0;
@@ -128,12 +135,14 @@ SampleDecodeResult decode_oggvorbis(struct timidity_file *tf)
 
 cleanup:
     ov_clear(&vf);
-
-    if (sdr.data) {
+       
+    if (sdr.data_alloced) {
         safe_free(sdr.data);
-        sdr.data = NULL;
     }
 
+    sdr.data = DummySampleData;
+       sdr.data_alloced = 0;
+
     return sdr;
 }
 
@@ -141,8 +150,13 @@ cleanup:
 
 SampleDecodeResult decode_oggvorbis(struct timidity_file *tf)
 {
+    SampleDecodeResult sdr = {0};
+
+       sdr.data = DummySampleData;
+       sdr.data_type = SAMPLE_TYPE_INT16;
     ctl->cmsg(CMSG_ERROR, VERB_NORMAL, "ogg vorbis decoder support is disabled");
-    return (SampleDecodeResult){.data = NULL, .data_type = 0};
+
+    return sdr;
 }
 
 #endif // HAVE_LIBVORBIS
index a1f926c..b0e370f 100644 (file)
@@ -3,6 +3,8 @@
 
 typedef struct SampleDecodeResult {
     sample_t *data;
+       uint8 data_alloced;
+       int data_type;
     splen_t data_length;
     int channels;
 } SampleDecodeResult;
index 5b5e6b1..64f53f0 100644 (file)
@@ -716,6 +716,11 @@ static void init_vst_effect(void)
 
 //static DATA_T efx_buffer[AUDIO_BUFFER_SIZE * 2]; // tmp buffer
 
+static inline FLOAT_T gain_FLOAT_T(FLOAT_T val)
+{
+       return pow((FLOAT_T)10.0, val * DIV_20);
+}
+
 /*! panning (pan = [0, 127]) */
 #if !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT) /* fixed-point implementation */
 static inline int32 do_left_panning(int32 sample, int32 pan)
@@ -808,13 +813,16 @@ static double calc_pan_xg_right(int val)
                return (double)(clip_int(val, 1, 127) - 0x40) * DIV_63 + 0.5;
 }
 
-///r
 static double calc_xg_level(int val, struct effect_xg_t *st)
 {
        if(val == 0)
                return 0.0;
-//     return (FLOAT_T)val * DIV_127;
-       return pow((FLOAT_T)val * DIV_127, 0.5); 
+#if 1 // c219
+       if(st == &variation_effect_xg[0] && st->connection == XG_CONN_INSERTION) // var ins mode
+               return (FLOAT_T)val * DIV_127 * 2.0;
+#endif
+       return (FLOAT_T)val * DIV_127;
+//     return pow((FLOAT_T)val * DIV_127, 0.5); 
 }
 
 static double calc_dry_xg(int val, struct effect_xg_t *st)
@@ -826,14 +834,12 @@ static double calc_dry_xg(int val, struct effect_xg_t *st)
                return 0.0;
        case XG_CONN_INSERTION:
        default:
-               if(val == 0)
-                       return 1.0;
-               else if(val == 127)
+               if(val == 127)
                        return 0.0;
-               else if(val == 64)
-                       return 0.5;
+               else if(val <= 64)
+                       return 1.0;
                else
-                       return 1.0 - ((double)(clip_int(val, 1, 127) - 0x40) * DIV_63 + 0.5);
+                       return (double)(127 - clip_int(val, 0, 127)) * DIV_63;
        }
 }
 
@@ -848,12 +854,10 @@ static double calc_wet_xg(int val, struct effect_xg_t *st)
        default:
                if(val == 0)
                        return 0.0;
-               else if(val == 127)
+               else if(val >= 64)
                        return 1.0;
-               else if(val == 64)
-                       return 0.5;
                else
-                       return (double)(clip_int(val, 1, 127) - 0x40) * DIV_63 + 0.5;
+                       return (double)(clip_int(val, 0, 127) - 0x1) * DIV_63;
        }
 }
 
@@ -883,14 +887,6 @@ static double calc_pan_gs_right(int val)
 
 static double calc_dry_gs(int val)
 {
-       //if(val == 0)
-       //      return 1.0;
-       //else if(val == 127)
-       //      return 0.0;
-       //else if(val == 64)
-       //      return 0.5;
-       //else
-       //      return 1.0 - ((double)(clip_int(val, 1, 127) - 0x40) * DIV_63 + 0.5);
        if(val == 127)
                return 0.0;
        else if(val <= 64)
@@ -901,14 +897,6 @@ static double calc_dry_gs(int val)
 
 static double calc_wet_gs(int val)
 {
-       //if(val == 0)
-       //      return 0.0;
-       //else if(val == 127)
-       //      return 1.0;
-       //else if(val == 64)
-       //      return 0.5;
-       //else
-       //      return (double)(clip_int(val, 1, 127) - 0x40) * DIV_63 + 0.5;
        if(val == 0)
                return 0.0;
        else if(val >= 64)
@@ -2302,14 +2290,15 @@ static void init_drive(Drive *drv, int type, double curve, double clip, double d
        int i;
        FLOAT_T clip_level, div_amp_level;
        FLOAT_T v1, v2, out;
-       const FLOAT_T div_size = DRIVE_SCALE_MAX / (FLOAT_T)DRIVE_TABLE_LENGTH;
+       FLOAT_T div_size = DRIVE_SCALE_MAX / (FLOAT_T)DRIVE_TABLE_LENGTH;
 
        if(clip < 0.01) clip = 0.01;
-       clip_level = ((FLOAT_T)WS_AMP_VALUE * 0.33) * clip;
-       div_amp_level = 3.0 / ((FLOAT_T)WS_AMP_VALUE * 0.33) * drive;
+       div_size /= clip;
+       clip_level = ((FLOAT_T)WS_AMP_VALUE * 0.5) * clip;
+       div_amp_level = 1.0 / ((FLOAT_T)WS_AMP_VALUE * 0.5) * drive;
        drv->cnv = (FLOAT_T)DRIVE_BASE_LENGTH * div_amp_level;
        drv->cnvi = TIM_FSCALE((FLOAT_T)(1 << DRIVE_INPUT_BIT) * div_amp_level, 24);
-
+       
        switch(type){
        default:
        case 0: // linear <-> clipping
@@ -2355,19 +2344,19 @@ static void init_drive(Drive *drv, int type, double curve, double clip, double d
                break;
        case 4: // sq/sqrt <-> clipping <-> tanh
                for(i = 0; i < (DRIVE_TABLE_LENGTH + 1); i++){
+                       double curve2;
                        v1 = div_size * i;
                        v2 = v1;
                        if(curve < 0.5){        
                                v1 = (v1 > 1.0) ? sqrt(v1) : sq(v1);                    
                                if(v2 > 1.0) v2 = 1.0;
-                               curve *= 2.0;
+                               curve2 = curve *2.0;
                        }else{
                                if(v1 > 1.0) v1 = 1.0;
                                v2 = tanh(v2 * M_PI);
-                               curve *= 2.0;
-                               curve -= 1.0;
+                               curve2 = (curve - 0.5) * 2.0;
                        }
-                       out = v1 + (v2 - v1) * curve;
+                       out = v1 + (v2 - v1) * curve2;
                        out *= clip_level;
                        drv->dc[i] = out;
                }
@@ -2377,13 +2366,9 @@ static void init_drive(Drive *drv, int type, double curve, double clip, double d
                        v1 = div_size * i;
                        v2 = v1;
                        if(v1 > 1.0) v1 = 1.0;
-                       v2 *= 4;
                        v2 -= floor(v2);
-                       v2 *= DIV_4;
+                       v2 *= 2.0;
                        if(v2 > 1.0) v2 = 2.0 - v2;
-                       if(v2 < 1.0) v2 = v2;
-                       else if(v1 < 3.0) v2 = 2.0 - v2;
-                       else if(v1 < 5.0) v2 = -2.0 + v2;
                        out = v1 + (v2 - v1) * curve;
                        out *= clip_level;
                        drv->dc[i] = out;
@@ -2405,13 +2390,9 @@ static void init_drive(Drive *drv, int type, double curve, double clip, double d
                        v1 = div_size * i;
                        v2 = v1;
                        v1 = tanh(v1 * M_PI);
-                       v2 *= 4;
                        v2 -= floor(v2);
-                       v2 *= DIV_4;
+                       v2 *= 2.0;
                        if(v2 > 1.0) v2 = 2.0 - v2;
-                       if(v2 < 1.0) v2 = v2;
-                       else if(v1 < 3.0) v2 = 2.0 - v2;
-                       else if(v1 < 5.0) v2 = -2.0 + v2;
                        out = v1 + (v2 - v1) * curve;
                        out *= clip_level;
                        drv->dc[i] = out;
@@ -2515,10 +2496,24 @@ static void do_drive_mono(Drive *drv, DATA_T *inout)
 {
        int32 index;
        FLOAT_T in, sign, v1, v2, fp;
-       
+
+       static FLOAT_T max = 0, avg = 0, sum = 0;
+       static int32 tc = 0;
+
+
+
+
        in = *inout;
        sign = (in < 0) ? (-1.0) : (1.0);
        in *= drv->cnv * sign;
+       
+       if(in > 1 && in > max)
+               max = in;
+       ++tc;
+       sum += in;
+       avg = sum / (FLOAT_T)tc;
+
+
        fp = floor(in);
        index = fp;
        fp = in - fp;
@@ -3552,7 +3547,7 @@ static void init_freeverb(InfoFreeverb *info)
        default:
                info->do_reverb_mode = do_freeverb_none;
                break;
-       }
+       }       
        if(error) info->do_reverb_mode = do_freeverb_none; // safe
        info->init = 1;
 }
@@ -4701,10 +4696,13 @@ double ext_reverb_ex_ap_rate = 0.1; // 0.15Hz
 double ext_reverb_ex_ap_depth = 0.5; // 1.5ms
 
 static void do_reverb_ex_none(DATA_T *buf, int32 count, InfoReverbEX *info);
-static void do_reverb_ex_chST(DATA_T *buf, int32 count, InfoReverbEX *info);
-static void do_reverb_ex_chMS(DATA_T *buf, int32 count, InfoReverbEX *info);
-static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info);
-static void do_reverb_ex_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *info);
+static void do_reverb_ex_chSTMS(DATA_T *buf, int32 count, InfoReverbEX *info);
+
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+static void do_reverb_ex_mod_chSTMS_thread(DATA_T *buf, int32 count, InfoReverbEX *info);
+static void do_reverb_ex_mod_chSTMS_thread1(int thread_num, void *info2);
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+
 
 static void rev_ex_mod_free_ap_delay(InfoReverbEX *info);
 static void init_reverb_ex_mod(InfoReverbEX *info);
@@ -4747,13 +4745,8 @@ static void rev_ex_init_rv_fbc(InfoReverbEX *info, int num, int unit_num)
 {
        int tmp = num + unit_num * 3 / 4; // + :offset
        if(tmp >= unit_num) tmp -= unit_num;
-       if(num & 0x2){ // swap
-               info->rv_in[num][0] = &info->rv_out[tmp][1];
-               info->rv_in[num][1] = &info->rv_out[tmp][0];
-       }else{
-               info->rv_in[num][0] = &info->rv_out[tmp][0];
-               info->rv_in[num][1] = &info->rv_out[tmp][1];
-       }
+       info->rv_in[num][0] = &info->rv_out[tmp][0];
+       info->rv_in[num][1] = &info->rv_out[tmp][1];
        info->rv_out[num][0] = info->rv_out[num][1] = 0;
 }
 
@@ -4829,7 +4822,7 @@ static int rev_ex_init_ap_delay(InfoReverbEX *info, int32 size)
 
 static void free_reverb_ex(InfoReverbEX *info)
 {
-       int i;  
+       int i, k;       
        if(!info)
                return;
        if(!info->init)
@@ -4842,6 +4835,23 @@ static void free_reverb_ex(InfoReverbEX *info)
        }
        rev_ex_free_rv_delay(info);
        rev_ex_free_ap_delay(info);
+
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+       if(info->tobuf != NULL){
+               aligned_free(info->tobuf);
+               info->tobuf = NULL;
+       }
+#else
+       if(info->tobuf != NULL){
+               safe_free(info->tobuf);
+               info->tobuf = NULL;
+       }
+#endif
+       reset_effect_sub_thread(do_reverb_ex_mod_chSTMS_thread1, info);
+       info->thread = 0;
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+
        info->init = 0;
 }
 
@@ -4885,7 +4895,13 @@ static void init_reverb_ex(InfoReverbEX *info)
        num_div2 = num / 2;
        div_num = 1.0 / (FLOAT_T)num;
        norm = pow(div_num, DIV_3_2);
-       ext_reverb_ex_ap_num = ext_reverb_ex_ap_num ? REV_EX_AP_MAX : 0;
+       if(ext_reverb_ex_ap_num >= REV_EX_AP_MAX)
+               ext_reverb_ex_ap_num = REV_EX_AP_MAX;
+       else if(ext_reverb_ex_ap_num >= 1)
+               ext_reverb_ex_ap_num = 4;
+       else 
+               ext_reverb_ex_ap_num = 0;
+       //ext_reverb_ex_ap_num = ext_reverb_ex_ap_num ? REV_EX_AP_MAX : 0;
        // init
        init_prime_list();
        pdelay_cnt = info->er_time_ms * playmode_rate_ms;       
@@ -4973,7 +4989,7 @@ static void init_reverb_ex(InfoReverbEX *info)
                }
                info->index2[REV_EX_AP1] = 0;
                info->index2[REV_EX_AP2] = 0;
-               ++size;
+               size++;
                if(size < 2) size = 2;
                info->size2[REV_EX_AP1] = size;
                info->size2[REV_EX_AP2] = size;
@@ -4984,7 +5000,7 @@ static void init_reverb_ex(InfoReverbEX *info)
        set_sample_filter_type(&info->hpf, FILTER_NONE);
        init_sample_filter(&info->hpf, REV_EX_HPF_FREQ, 0, FILTER_HPF_BW);      
        info->unit_num = num;
-       info->st_sprd = div_num; // L+,R-
+       info->st_sprd = (info->mode == CH_STEREO) ? div_num : 0.0; // L+,R-
        info->flt_wet = (info->rev_damp_bal + 1.0) * DIV_2;
        info->flt_dry = 1.0 - info->flt_wet;
        info->feedback = info->rev_feedback * REV_EX_FEEDBACK;
@@ -5027,15 +5043,19 @@ static void init_reverb_ex(InfoReverbEX *info)
        // func
        switch(info->mode){
        case CH_STEREO:
-               info->do_reverb_mode = ext_reverb_ex_ap_num ? do_reverb_ex_chST_ap8 : do_reverb_ex_chST;
-               break;
        case CH_MIX_STEREO:
-               info->do_reverb_mode = ext_reverb_ex_ap_num ? do_reverb_ex_chMS_ap8 : do_reverb_ex_chMS;
+               info->do_reverb_mode = do_reverb_ex_chSTMS;
                break;
        default:
                info->do_reverb_mode = do_reverb_ex_none;
                break;
-       }
+       }       
+
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)        
+       reset_effect_sub_thread(do_reverb_ex_mod_chSTMS_thread1, info);
+       info->thread = 0;
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+
        if(error) info->do_reverb_mode = do_reverb_ex_none; // safe
        info->init = 1;
 }
@@ -5047,168 +5067,8 @@ static void do_reverb_ex_none(DATA_T *buf, int32 count, InfoReverbEX *info)
        memset(buf, 0, sizeof(DATA_T) * count); // count > 0 // wet 0 
 }
 
-
 #if (OPT_MODE == 1) && !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT) /* fixed-point implementation */
-static void do_reverb_ex_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       int32 leveler = info->leveleri, levelrv = info->levelrvi, feedback = info->feedbacki,
-               *rv_feedback = info->rv_feedbacki, flt_dry = info->flt_dryi, flt_wet = info->flt_weti,
-               *dcH = hpf->dc, *dcL = er_fc->dc, in_level = info->in_leveli, st_sprd = info->st_sprdi;
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[8], *dbL = er_fc->db; 
-       DATA_T *hist = info->hist, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2], sprd;
-       // CH_STEREO:
-       for (k = 0; k < count; k++)
-       {               
-               input[0] = buf[k]; input[1] = buf[k + 1];
-               sprd = imuldiv24((input[0] - input[1]), st_sprd);
-               input[0] = input[1] = imuldiv24((input[0] + input[1]), in_level);
-               input[0] += imuldiv24(hist[0], feedback); input[1] += imuldiv24(hist[1], feedback);
-               // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               // unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               for (i = 0; i < info->unit_num; i++) {
-                       // index inc
-                       DATA_T *buf[4];
-                       int32 *index[4] = {&info->index[i][0], &info->index[i][1], &info->index[i][2], &info->index[i][3],};
-                       if (++(*index[0]) >= info->size[i][0]) {*index[0] = 0;}
-                       if (++(*index[1]) >= info->size[i][1]) {*index[1] = 0;}
-                       if (++(*index[2]) >= info->size[i][2]) {*index[2] = 0;}
-                       if (++(*index[3]) >= info->size[i][3]) {*index[3] = 0;}
-                       buf[0] = &info->buf[i][0][*index[0]];
-                       buf[1] = &info->buf[i][1][*index[1]];
-                       buf[2] = &info->buf[i][2][*index[2]];
-                       buf[3] = &info->buf[i][3][*index[3]];
-                       // er out
-                       dat_er[0] += *buf[0]; dat_er[1] += *buf[1];
-                       // er in
-                       *buf[0] = input[0]; *buf[1] = input[1];
-                       input[0] += sprd; input[1] -= sprd; // spread
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = *buf[2];
-                       tmp1[1] = *buf[3];
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = imuldiv28(tmp1[0], lpf->dc[0]) + imuldiv28(lpf->db[0], lpf->dc[1]);
-                       lpf->db[1] = imuldiv28(tmp1[1], lpf->dc[0]) + imuldiv28(lpf->db[1], lpf->dc[1]);
-                       dat_rv[0] += (info->rv_out[i][0] = imuldiv24(tmp1[0], flt_dry) + imuldiv24(lpf->db[0], flt_wet));
-                       dat_rv[1] += (info->rv_out[i][1] = imuldiv24(tmp1[1], flt_dry) + imuldiv24(lpf->db[1], flt_wet));
-                       // rv in
-                       *buf[2] = input_rv[0] + imuldiv24(tmp_rv[0], rv_feedback[i]);
-                       *buf[3] = input_rv[1] + imuldiv24(tmp_rv[1], rv_feedback[i]);
-               }
-               // er flt
-               dbL[0] = imuldiv28(dat_er[0], dcL[0]) + imuldiv28(dbL[0], dcL[1]);
-               dbL[1] = imuldiv28(dat_er[1], dcL[0]) + imuldiv28(dbL[1], dcL[1]);
-               dat_er[0] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[0], flt_wet);
-               dat_er[1] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[1], flt_wet);
-               // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
-               // out
-               hist[0] = imuldiv16(dat_rv[0], levelrv) - imuldiv16(dat_er[0], leveler);
-               hist[1] = imuldiv16(dat_rv[1], levelrv) - imuldiv16(dat_er[1], leveler);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = imuldiv28(dbHL[0], dcH[0]) + imuldiv28(dbHL[1], dcH[1]) + imuldiv28(dbHL[2], dcH[2])
-                       - imuldiv28(dbHL[3], dcH[3]) - imuldiv28(dbHL[4], dcH[4]);
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = imuldiv28(dbHR[0], dcH[0]) + imuldiv28(dbHR[1], dcH[1]) + imuldiv28(dbHR[2], dcH[2])
-                       - imuldiv28(dbHR[3], dcH[3]) - imuldiv28(dbHR[4], dcH[4]);
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];              
-               buf[k] = hist[0]; buf[++k] = hist[1];
-       }
-}
-
-static void do_reverb_ex_chMS(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       int32 leveler = info->leveleri, levelrv = info->levelrvi, feedback = info->feedbacki,
-               *rv_feedback = info->rv_feedbacki, flt_dry = info->flt_dryi, flt_wet = info->flt_weti,
-               *dcH = hpf->dc, *dcL = er_fc->dc, in_level = info->in_leveli;
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
-       DATA_T  *hist = info->hist, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2];
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k++)
-       {               
-               input[0] = input[1] = imuldiv24((buf[k] + buf[k + 1]), info->in_level);
-               input[0] += imuldiv24(hist[0], feedback); input[1] += imuldiv24(hist[1], feedback);
-               // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               // unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               for (i = 0; i < info->unit_num; i++) {
-                       // index inc
-                       DATA_T *buf[4];
-                       int32 *index[4] = {&info->index[i][0], &info->index[i][1], &info->index[i][2], &info->index[i][3],};
-                       if (++(*index[0]) >= info->size[i][0]) {*index[0] = 0;}
-                       if (++(*index[1]) >= info->size[i][1]) {*index[1] = 0;}
-                       if (++(*index[2]) >= info->size[i][2]) {*index[2] = 0;}
-                       if (++(*index[3]) >= info->size[i][3]) {*index[3] = 0;}
-                       buf[0] = &info->buf[i][0][*index[0]];
-                       buf[1] = &info->buf[i][1][*index[1]];
-                       buf[2] = &info->buf[i][2][*index[2]];
-                       buf[3] = &info->buf[i][3][*index[3]];
-                       // er out
-                       dat_er[0] += *buf[0]; dat_er[1] += *buf[1];
-                       // er in
-                       *buf[0] = input[0]; *buf[1] = input[1];
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = *buf[2];
-                       tmp1[1] = *buf[3];
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = imuldiv28(tmp1[0], lpf->dc[0]) + imuldiv28(lpf->db[0], lpf->dc[1]);
-                       lpf->db[1] = imuldiv28(tmp1[1], lpf->dc[0]) + imuldiv28(lpf->db[1], lpf->dc[1]);
-                       dat_rv[0] += (info->rv_out[i][0] = imuldiv24(tmp1[0], flt_dry) + imuldiv24(lpf->db[0], flt_wet));
-                       dat_rv[1] += (info->rv_out[i][1] = imuldiv24(tmp1[1], flt_dry) + imuldiv24(lpf->db[1], flt_wet));
-                       // rv in
-                       *buf[2] = input_rv[0] + imuldiv24(tmp_rv[0], rv_feedback[i]);
-                       *buf[3] = input_rv[1] + imuldiv24(tmp_rv[1], rv_feedback[i]);
-               }
-               // er flt
-               dbL[0] = imuldiv28(dat_er[0], dcL[0]) + imuldiv28(dbL[0], dcL[1]);
-               dbL[1] = imuldiv28(dat_er[1], dcL[0]) + imuldiv28(dbL[1], dcL[1]);
-               dat_er[0] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[0], flt_wet);
-               dat_er[1] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[1], flt_wet);
-               // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
-               // out
-               hist[0] = imuldiv16(dat_rv[0], levelrv) - imuldiv16(dat_er[0], leveler);
-               hist[1] = imuldiv16(dat_rv[1], levelrv) - imuldiv16(dat_er[1], leveler);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = imuldiv28(dbHL[0], dcH[0]) + imuldiv28(dbHL[1], dcH[1]) + imuldiv28(dbHL[2], dcH[2])
-                       - imuldiv28(dbHL[3], dcH[3]) - imuldiv28(dbHL[4], dcH[4]);
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = imuldiv28(dbHR[0], dcH[0]) + imuldiv28(dbHR[1], dcH[1]) + imuldiv28(dbHR[2], dcH[2])
-                       - imuldiv28(dbHR[3], dcH[3]) - imuldiv28(dbHR[4], dcH[4]);
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];              
-               buf[k] = hist[0]; buf[++k] = hist[1];
-       }
-}
-
-static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
+static void do_reverb_ex_chSTMS(DATA_T *buf, int32 count, InfoReverbEX *info)
 {
        int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
        FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
@@ -5273,12 +5133,13 @@ static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
                // rv delay in
                bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
                // ap
+               if(ext_reverb_ex_ap_num){
                if ((++(*indexap)) >= sizeap) {*indexap -= sizeap;}
                info->abuf[0][*indexap] = imuldiv24(dat_er[0], levelap) + imuldiv24(fb_ap1[0], apfbi); 
                info->abuf[1][*indexap] = imuldiv24(dat_er[1], levelap) + imuldiv24(fb_ap1[1], apfbi); 
                info->abuf[2][*indexap] = imuldiv24(dat_rv[0], levelap) + imuldiv24(fb_ap2[0], apfbi); 
                info->abuf[3][*indexap] = imuldiv24(dat_rv[1], levelap) + imuldiv24(fb_ap2[1], apfbi);  
-               for (i = 0; i < REV_EX_AP_MAX; i++) {
+               for (i = 0; i < ext_reverb_ex_ap_num; i++) {
                        int32 index;
                        // ap1 er
                        if((index = *indexap - info->delaya[i][0]) < 0) {index += sizeap;} 
@@ -5293,112 +5154,10 @@ static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
                }
                fb_ap1[0] = dat_er[0]; fb_ap1[1] = dat_er[1];
                fb_ap2[0] = dat_rv[0]; fb_ap2[1] = dat_rv[1];
-               // out
-               hist[0] = imuldiv16(dat_rv[0], levelrv) - imuldiv16(dat_er[0], leveler);
-               hist[1] = imuldiv16(dat_rv[1], levelrv) - imuldiv16(dat_er[1], leveler);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = imuldiv28(dbHL[0], dcH[0]) + imuldiv28(dbHL[1], dcH[1]) + imuldiv28(dbHL[2], dcH[2])
-                       - imuldiv28(dbHL[3], dcH[3]) - imuldiv28(dbHL[4], dcH[4]);
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = imuldiv28(dbHR[0], dcH[0]) + imuldiv28(dbHR[1], dcH[1]) + imuldiv28(dbHR[2], dcH[2])
-                       - imuldiv28(dbHR[3], dcH[3]) - imuldiv28(dbHR[4], dcH[4]);
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];              
-               buf[k] = hist[0]; buf[++k] = hist[1];
-       }
-}
-
-static void do_reverb_ex_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       int32 leveler = info->leveleri, levelrv = info->levelrvi, feedback = info->feedbacki,
-               *rv_feedback = info->rv_feedbacki, flt_dry = info->flt_dryi, flt_wet = info->flt_weti,
-               *dcH = hpf->dc, *dcL = er_fc->dc, in_level = info->in_leveli;
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
-       DATA_T  *hist = info->hist, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2];
-       int32 *indexap = &info->index2[REV_EX_AP1], sizeap = info->size2[REV_EX_AP1];
-       DATA_T *fb_ap1 = info->fb_ap1, *fb_ap2 = info->fb_ap2;
-       int32 levelap = info->levelapi;
-       const int32 apfbi = TIM_FSCALE(REV_EX_AP_FB, 24);
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k++)
-       {               
-               input[0] = input[1] = imuldiv24((buf[k] + buf[k + 1]), info->in_level);
-               input[0] += imuldiv24(hist[0], feedback); input[1] += imuldiv24(hist[1], feedback);
-               // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               // unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               for (i = 0; i < info->unit_num; i++) {
-                       // index inc
-                       DATA_T *buf[4];
-                       int32 *index[4] = {&info->index[i][0], &info->index[i][1], &info->index[i][2], &info->index[i][3],};
-                       if (++(*index[0]) >= info->size[i][0]) {*index[0] = 0;}
-                       if (++(*index[1]) >= info->size[i][1]) {*index[1] = 0;}
-                       if (++(*index[2]) >= info->size[i][2]) {*index[2] = 0;}
-                       if (++(*index[3]) >= info->size[i][3]) {*index[3] = 0;}
-                       buf[0] = &info->buf[i][0][*index[0]];
-                       buf[1] = &info->buf[i][1][*index[1]];
-                       buf[2] = &info->buf[i][2][*index[2]];
-                       buf[3] = &info->buf[i][3][*index[3]];
-                       // er out
-                       dat_er[0] += *buf[0]; dat_er[1] += *buf[1];
-                       // er in
-                       *buf[0] = input[0]; *buf[1] = input[1];
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = *buf[2];
-                       tmp1[1] = *buf[3];
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = imuldiv28(tmp1[0], lpf->dc[0]) + imuldiv28(lpf->db[0], lpf->dc[1]);
-                       lpf->db[1] = imuldiv28(tmp1[1], lpf->dc[0]) + imuldiv28(lpf->db[1], lpf->dc[1]);
-                       dat_rv[0] += (info->rv_out[i][0] = imuldiv24(tmp1[0], flt_dry) + imuldiv24(lpf->db[0], flt_wet));
-                       dat_rv[1] += (info->rv_out[i][1] = imuldiv24(tmp1[1], flt_dry) + imuldiv24(lpf->db[1], flt_wet));
-                       // rv in
-                       *buf[2] = input_rv[0] + imuldiv24(tmp_rv[0], rv_feedback[i]);
-                       *buf[3] = input_rv[1] + imuldiv24(tmp_rv[1], rv_feedback[i]);
-               }
-               // er flt
-               dbL[0] = imuldiv28(dat_er[0], dcL[0]) + imuldiv28(dbL[0], dcL[1]);
-               dbL[1] = imuldiv28(dat_er[1], dcL[0]) + imuldiv28(dbL[1], dcL[1]);
-               dat_er[0] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[0], flt_wet);
-               dat_er[1] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[1], flt_wet);
-               // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
-               // ap
-               if ((++(*indexap)) >= sizeap) {*indexap -= sizeap;}
-               info->abuf[0][*indexap] = imuldiv24(dat_er[0], levelap) + imuldiv24(fb_ap1[0], apfbi); 
-               info->abuf[1][*indexap] = imuldiv24(dat_er[1], levelap) + imuldiv24(fb_ap1[1], apfbi); 
-               info->abuf[2][*indexap] = imuldiv24(dat_rv[0], levelap) + imuldiv24(fb_ap2[0], apfbi); 
-               info->abuf[3][*indexap] = imuldiv24(dat_rv[1], levelap) + imuldiv24(fb_ap2[1], apfbi);  
-               for (i = 0; i < REV_EX_AP_MAX; i++) {
-                       int32 index;
-                       // ap1 er
-                       if((index = *indexap - info->delaya[i][0]) < 0) {index += sizeap;} 
-                       dat_er[0] += info->abuf[0][index]; 
-                       if((index = *indexap - info->delaya[i][1]) < 0) {index += sizeap;}
-                       dat_er[1] += info->abuf[1][index];      
-                       // ap2 rv
-                       if((index = *indexap - info->delaya[i][2]) < 0) {index += sizeap;}
-                       dat_rv[0] += info->abuf[2][index]; 
-                       if((index = *indexap - info->delaya[i][3]) < 0) {index += sizeap;}
-                       dat_rv[1] += info->abuf[3][index]; 
                }
-               fb_ap1[0] = dat_er[0]; fb_ap1[1] = dat_er[1];
-               fb_ap2[0] = dat_rv[0]; fb_ap2[1] = dat_rv[1];
                // out
-               hist[0] = imuldiv16(dat_rv[0], levelrv) - imuldiv16(dat_er[0], leveler);
-               hist[1] = imuldiv16(dat_rv[1], levelrv) - imuldiv16(dat_er[1], leveler);
+               hist[0] = imuldiv16(dat_rv[0], levelrv) + imuldiv16(dat_er[0], leveler);
+               hist[1] = imuldiv16(dat_rv[1], levelrv) + imuldiv16(dat_er[1], leveler);
                dbHL[0] = hist[0];      
                hist[0] = dbHL[2] = imuldiv28(dbHL[0], dcH[0]) + imuldiv28(dbHL[1], dcH[1]) + imuldiv28(dbHL[2], dcH[2])
                        - imuldiv28(dbHL[3], dcH[3]) - imuldiv28(dbHL[4], dcH[4]);
@@ -5422,163 +5181,7 @@ static void do_reverb_ex_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
 SSE2 128bitSIMD : double*2ch, int32*4ch
 x64AVX\82Å\96â\91è\81E\81E?
 */
-static void do_reverb_ex_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0;
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       DATA_T *bufrd = info->buf2[REV_EX_RD];
-       __m128d vec_dcH0 = MM_LOAD1_PD(&hpf->dc[0]), vec_dcH1 = MM_LOAD1_PD(&hpf->dc[1]), vec_dcH2 = MM_LOAD1_PD(&hpf->dc[2]), 
-               vec_dcH3 = MM_LOAD1_PD(&hpf->dc[3]), vec_dcH4 = MM_LOAD1_PD(&hpf->dc[4]),
-               vec_dcL0 = MM_LOAD1_PD(&er_fc->dc[0]), vec_dcL1 = MM_LOAD1_PD(&er_fc->dc[1]);
-       __m128d vec_dbH0 = _mm_loadu_pd(&hpf->db[0]), vec_dbH1 = _mm_loadu_pd(&hpf->db[2]), vec_dbH2 = _mm_loadu_pd(&hpf->db[4]), 
-               vec_dbH3 = _mm_loadu_pd(&hpf->db[6]), vec_dbH4 = _mm_loadu_pd(&hpf->db[8]), vec_dbL = _mm_loadu_pd(er_fc->db);
-       __m128d vec_tmp1, vec_input_er, vec_input_rv, vec_mixer, vec_mixap, vec_mixrv, vec_tmp_rv, vec_db, vec_sprd,
-               vec_leveler = MM_LOAD1_PD(&info->leveler), vec_levelrv = MM_LOAD1_PD(&info->levelrv),
-               vec_dry = MM_LOAD1_PD(&info->flt_dry), vec_wet = MM_LOAD1_PD(&info->flt_wet),
-               vec_feedback = MM_LOAD1_PD(&info->feedback), vec_hist = _mm_loadu_pd(info->hist);
-       __m128d vec_sp_sprd = _mm_set_pd(-info->st_sprd, info->st_sprd);
-       __m128i index2 = _mm_loadu_si128((__m128i *)info->index2), size2 = _mm_loadu_si128((__m128i *)info->size2);
-       __m128i add_idx2 = _mm_set_epi32(1, 1, 2, 1);
-       // CH_STEREO:
-       for (k = 0; k < count; k += 2)
-       {               
-               int32 tmpi0;
-               vec_sprd = _mm_mul_pd(_mm_set1_pd(buf[k] - buf[k + 1]), vec_sp_sprd);
-               vec_input_er = _mm_add_pd(_mm_set1_pd((buf[k] + buf[k + 1]) * DIV_MIX_LEVEL), _mm_mul_pd(vec_hist, vec_feedback));
-               // index2 (rv delay, ap
-               index2 = _mm_add_epi32(index2, add_idx2);
-               index2 = _mm_and_si128(index2, _mm_cmplt_epi32(index2, size2));
-               tmpi0 = MM_EXTRACT_EPI32(index2, REV_EX_RD);
-               // rv delay out
-               vec_input_rv = _mm_load_pd(&bufrd[tmpi0]); // REV_EX_RD
-               // unit
-               vec_mixer = _mm_setzero_pd(); vec_mixrv = _mm_setzero_pd();
-               for (i = 0; i < info->unit_num; i++) {
-               ALIGN int32 tmpi[4];
-               // index inc
-               __m128i vec_index = _mm_loadu_si128((__m128i *)&info->index[i][0]); 
-               __m128i vec_size = _mm_loadu_si128((__m128i *)&info->size[i][0]);
-               vec_index = _mm_add_epi32(vec_index, _mm_set_epi32(1, 1, 1, 1));
-               vec_index = _mm_and_si128(vec_index, _mm_cmplt_epi32(vec_index, vec_size));
-               _mm_storeu_si128((__m128i *)&info->index[i][0], vec_index);
-               // er
-               _mm_store_si128((__m128i *)&tmpi, vec_index);
-               vec_mixer = _mm_add_pd(vec_mixer, _mm_set_pd(info->buf[i][1][tmpi[1]], info->buf[i][0][tmpi[0]]));
-               _mm_store_sd(&(info->buf[i][0][tmpi[0]]), vec_input_er); 
-               _mm_store_sd(&(info->buf[i][1][tmpi[1]]), _mm_shuffle_pd(vec_input_er, vec_input_er, 0x1));
-               vec_input_er = _mm_add_pd(vec_input_er, vec_sprd); // spread
-               // rv save
-               vec_tmp_rv = _mm_set_pd(*info->rv_in[i][1], *info->rv_in[i][0]);
-               // rv out
-               vec_tmp1 = _mm_set_pd(info->buf[i][3][tmpi[3]], info->buf[i][2][tmpi[2]]);
-               lpf = &rv_fc[i];
-               vec_db = _mm_loadu_pd(lpf->db);
-               vec_db = MM_FMA2_PD(MM_LOAD1_PD(&lpf->dc[0]), vec_tmp1, MM_LOAD1_PD(&lpf->dc[1]), vec_db);
-               _mm_storeu_pd(lpf->db, vec_db);
-               vec_tmp1 = MM_FMA2_PD(vec_tmp1, vec_dry, vec_db, vec_wet);
-               vec_mixrv = _mm_add_pd(vec_mixrv, vec_tmp1);
-               _mm_storeu_pd(info->rv_out[i], vec_tmp1);
-               // rv in
-               vec_tmp1 = _mm_add_pd(vec_input_rv, _mm_mul_pd(vec_tmp_rv, MM_LOAD1_PD(&info->rv_feedback[i])));
-               _mm_store_sd(&(info->buf[i][2][tmpi[2]]), vec_tmp1);
-               _mm_store_sd(&(info->buf[i][3][tmpi[3]]), _mm_shuffle_pd(vec_tmp1, vec_tmp1, 0x1));
-               }
-               // er flt
-               vec_dbL = MM_FMA2_PD(vec_dcL0, vec_mixer, vec_dcL1, vec_dbL);
-               vec_mixer = MM_FMA2_PD(vec_mixer, vec_dry, vec_dbL, vec_wet);   
-               // rv delay in
-               _mm_store_pd(&bufrd[tmpi0], vec_mixer); 
-               // out
-               vec_hist = _mm_sub_pd(_mm_mul_pd(vec_mixrv, vec_levelrv), _mm_mul_pd(vec_mixer, vec_leveler));
-               vec_dbH0 = vec_hist;
-               vec_hist = vec_dbH2 = MM_FMA5_PD(vec_dcH0, vec_dbH0, vec_dcH1, vec_dbH1, vec_dcH2, vec_dbH2, vec_dcH3, vec_dbH3, vec_dcH4, vec_dbH4);
-               vec_dbH4 = vec_dbH3; vec_dbH3 = vec_dbH2; vec_dbH2 = vec_dbH1; vec_dbH1 = vec_dbH0;
-               _mm_store_pd(&buf[k], vec_hist);
-       }
-       _mm_storeu_pd(info->hist, vec_hist); _mm_storeu_pd(er_fc->db, vec_dbL);
-       _mm_storeu_pd(&hpf->db[0], vec_dbH0); _mm_storeu_pd(&hpf->db[2], vec_dbH1); _mm_storeu_pd(&hpf->db[4], vec_dbH2);
-       _mm_storeu_pd(&hpf->db[6], vec_dbH3); _mm_storeu_pd(&hpf->db[8], vec_dbH4);
-       _mm_storeu_si128((__m128i *)info->index2, index2);
-}
-
-static void do_reverb_ex_chMS(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0;
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       DATA_T *bufrd = info->buf2[REV_EX_RD];
-       __m128d vec_dcH0 = MM_LOAD1_PD(&hpf->dc[0]), vec_dcH1 = MM_LOAD1_PD(&hpf->dc[1]), vec_dcH2 = MM_LOAD1_PD(&hpf->dc[2]), 
-               vec_dcH3 = MM_LOAD1_PD(&hpf->dc[3]), vec_dcH4 = MM_LOAD1_PD(&hpf->dc[4]),
-               vec_dcL0 = MM_LOAD1_PD(&er_fc->dc[0]), vec_dcL1 = MM_LOAD1_PD(&er_fc->dc[1]);
-       __m128d vec_dbH0 = _mm_loadu_pd(&hpf->db[0]), vec_dbH1 = _mm_loadu_pd(&hpf->db[2]), vec_dbH2 = _mm_loadu_pd(&hpf->db[4]), 
-               vec_dbH3 = _mm_loadu_pd(&hpf->db[6]), vec_dbH4 = _mm_loadu_pd(&hpf->db[8]), vec_dbL = _mm_loadu_pd(er_fc->db);
-       __m128d vec_tmp1, vec_input_er, vec_input_rv, vec_mixer, vec_mixap, vec_mixrv, vec_tmp_rv, vec_db, 
-               vec_leveler = MM_LOAD1_PD(&info->leveler), vec_levelrv = MM_LOAD1_PD(&info->levelrv),
-               vec_dry = MM_LOAD1_PD(&info->flt_dry), vec_wet = MM_LOAD1_PD(&info->flt_wet),
-               vec_feedback = MM_LOAD1_PD(&info->feedback), vec_hist = _mm_loadu_pd(info->hist),
-               vec_mix_level = _mm_set1_pd(DIV_MIX_LEVEL);
-       __m128i index2 = _mm_loadu_si128((__m128i *)info->index2), size2 = _mm_loadu_si128((__m128i *)info->size2);
-       __m128i add_idx2 = _mm_set_epi32(1, 1, 2, 1);
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k += 2)
-       {               
-               int32 tmpi0;
-               vec_input_er = _mm_add_pd(_mm_set1_pd((buf[k] + buf[k + 1]) * DIV_MIX_LEVEL), _mm_mul_pd(vec_hist, vec_feedback));
-               // index2 (rv delay, ap
-               index2 = _mm_add_epi32(index2, add_idx2);
-               index2 = _mm_and_si128(index2, _mm_cmplt_epi32(index2, size2));
-               tmpi0 = MM_EXTRACT_EPI32(index2, REV_EX_RD);
-               // rv delay out
-               vec_input_rv = _mm_load_pd(&bufrd[tmpi0]); // REV_EX_RD
-               // unit
-               vec_mixer = _mm_setzero_pd(); vec_mixrv = _mm_setzero_pd();
-               for (i = 0; i < info->unit_num; i++) {
-               ALIGN int32 tmpi[4];
-               // index inc
-               __m128i vec_index = _mm_loadu_si128((__m128i *)&info->index[i][0]); 
-               __m128i vec_size = _mm_loadu_si128((__m128i *)&info->size[i][0]);
-               vec_index = _mm_add_epi32(vec_index, _mm_set_epi32(1, 1, 1, 1));
-               vec_index = _mm_and_si128(vec_index, _mm_cmplt_epi32(vec_index, vec_size));
-               _mm_storeu_si128((__m128i *)&info->index[i][0], vec_index);
-               // er
-               _mm_store_si128((__m128i *)&tmpi, vec_index);
-               vec_mixer = _mm_add_pd(vec_mixer, _mm_set_pd(info->buf[i][1][tmpi[1]], info->buf[i][0][tmpi[0]]));
-               _mm_store_sd(&(info->buf[i][0][tmpi[0]]), vec_input_er); 
-               _mm_store_sd(&(info->buf[i][1][tmpi[1]]), _mm_shuffle_pd(vec_input_er, vec_input_er, 0x1));
-               // rv save
-               vec_tmp_rv = _mm_set_pd(*info->rv_in[i][1], *info->rv_in[i][0]);
-               // rv out
-               vec_tmp1 = _mm_set_pd(info->buf[i][3][tmpi[3]], info->buf[i][2][tmpi[2]]);
-               lpf = &rv_fc[i];
-               vec_db = _mm_loadu_pd(lpf->db);
-               vec_db = MM_FMA2_PD(MM_LOAD1_PD(&lpf->dc[0]), vec_tmp1, MM_LOAD1_PD(&lpf->dc[1]), vec_db);
-               _mm_storeu_pd(lpf->db, vec_db);
-               vec_tmp1 = MM_FMA2_PD(vec_tmp1, vec_dry, vec_db, vec_wet);
-               vec_mixrv = _mm_add_pd(vec_mixrv, vec_tmp1);
-               _mm_storeu_pd(info->rv_out[i], vec_tmp1);
-               // rv in
-               vec_tmp1 = _mm_add_pd(vec_input_rv, _mm_mul_pd(vec_tmp_rv, MM_LOAD1_PD(&info->rv_feedback[i])));
-               _mm_store_sd(&(info->buf[i][2][tmpi[2]]), vec_tmp1);
-               _mm_store_sd(&(info->buf[i][3][tmpi[3]]), _mm_shuffle_pd(vec_tmp1, vec_tmp1, 0x1));
-               }
-               // er flt
-               vec_dbL = MM_FMA2_PD(vec_dcL0, vec_mixer, vec_dcL1, vec_dbL);
-               vec_mixer = MM_FMA2_PD(vec_mixer, vec_dry, vec_dbL, vec_wet);   
-               // rv delay in
-               _mm_store_pd(&bufrd[tmpi0], vec_mixer); 
-               // out
-               vec_hist = _mm_sub_pd(_mm_mul_pd(vec_mixrv, vec_levelrv), _mm_mul_pd(vec_mixer, vec_leveler));
-               vec_dbH0 = vec_hist;
-               vec_hist = vec_dbH2 = MM_FMA5_PD(vec_dcH0, vec_dbH0, vec_dcH1, vec_dbH1, vec_dcH2, vec_dbH2, vec_dcH3, vec_dbH3, vec_dcH4, vec_dbH4);
-               vec_dbH4 = vec_dbH3; vec_dbH3 = vec_dbH2; vec_dbH2 = vec_dbH1; vec_dbH1 = vec_dbH0;
-               _mm_store_pd(&buf[k], vec_hist);
-       }
-       _mm_storeu_pd(info->hist, vec_hist); _mm_storeu_pd(er_fc->db, vec_dbL);
-       _mm_storeu_pd(&hpf->db[0], vec_dbH0); _mm_storeu_pd(&hpf->db[2], vec_dbH1); _mm_storeu_pd(&hpf->db[4], vec_dbH2);
-       _mm_storeu_pd(&hpf->db[6], vec_dbH3); _mm_storeu_pd(&hpf->db[8], vec_dbH4);
-       _mm_storeu_si128((__m128i *)info->index2, index2);
-}
-
-static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
+static void do_reverb_ex_chSTMS(DATA_T *buf, int32 count, InfoReverbEX *info)
 {
        int32 i, k = 0;
        FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
@@ -5650,14 +5253,15 @@ static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
                // rv delay in
                _mm_store_pd(&bufrd[tmpi2[REV_EX_RD]], vec_mixer);
                // ap
+               if(ext_reverb_ex_ap_num){
                vec_index2 = _mm_shuffle_epi32(index2, 0xAA);
                vtmp[0] = MM_FMA2_PD(vec_mixer, vec_levelap, vec_fbap1, vec_ap_fb);
                vtmp[1] = MM_FMA2_PD(vec_mixrv, vec_levelap, vec_fbap2, vec_ap_fb);
                _mm_store_sd(&info->abuf[REV_EX_ER_L1][tmpi2[REV_EX_AP1]], vtmp[0]); 
                _mm_store_sd(&info->abuf[REV_EX_ER_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[0], vtmp[0], 0x3)); 
                _mm_store_sd(&info->abuf[REV_EX_RV_L1][tmpi2[REV_EX_AP1]], vtmp[1]); 
-               _mm_store_sd(&info->abuf[REV_EX_RV_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[1], vtmp[0], 0x3)); 
-               for (i = 0; i < REV_EX_AP_MAX; i++) {
+               _mm_store_sd(&info->abuf[REV_EX_RV_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[1], vtmp[1], 0x3)); 
+               for (i = 0; i < ext_reverb_ex_ap_num; i++) {
                ALIGN int32 tmpi[4];
                __m128i vec_index;
                vec_index = _mm_sub_epi32(vec_index2, _mm_loadu_si128((__m128i *)info->delaya[i]));
@@ -5669,111 +5273,9 @@ static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
                vec_mixrv = _mm_add_pd(vec_mixrv, _mm_set_pd(info->abuf[3][tmpi[3]], info->abuf[2][tmpi[2]]));
                }
                vec_fbap1 = vec_mixer; vec_fbap2 = vec_mixrv;
-               // out
-               vec_hist = _mm_sub_pd(_mm_mul_pd(vec_mixrv, vec_levelrv), _mm_mul_pd(vec_mixer, vec_leveler));
-               vec_dbH0 = vec_hist;
-               vec_hist = vec_dbH2 = MM_FMA5_PD(vec_dcH0, vec_dbH0, vec_dcH1, vec_dbH1, vec_dcH2, vec_dbH2, vec_dcH3, vec_dbH3, vec_dcH4, vec_dbH4);
-               vec_dbH4 = vec_dbH3; vec_dbH3 = vec_dbH2; vec_dbH2 = vec_dbH1; vec_dbH1 = vec_dbH0;
-               _mm_store_pd(&buf[k], vec_hist);
-       }
-       _mm_storeu_pd(info->hist, vec_hist); _mm_storeu_pd(er_fc->db, vec_dbL);
-       _mm_storeu_pd(&hpf->db[0], vec_dbH0); _mm_storeu_pd(&hpf->db[2], vec_dbH1); _mm_storeu_pd(&hpf->db[4], vec_dbH2);
-       _mm_storeu_pd(&hpf->db[6], vec_dbH3); _mm_storeu_pd(&hpf->db[8], vec_dbH4);
-       _mm_storeu_si128((__m128i *)info->index2, index2);
-       _mm_storeu_pd(info->fb_ap1, vec_fbap1); _mm_storeu_pd(info->fb_ap2, vec_fbap2);
-}
-
-static void do_reverb_ex_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0;
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       DATA_T *bufrd = info->buf2[REV_EX_RD];
-       __m128d vec_dcH0 = MM_LOAD1_PD(&hpf->dc[0]), vec_dcH1 = MM_LOAD1_PD(&hpf->dc[1]), vec_dcH2 = MM_LOAD1_PD(&hpf->dc[2]), 
-               vec_dcH3 = MM_LOAD1_PD(&hpf->dc[3]), vec_dcH4 = MM_LOAD1_PD(&hpf->dc[4]),
-               vec_dcL0 = MM_LOAD1_PD(&er_fc->dc[0]), vec_dcL1 = MM_LOAD1_PD(&er_fc->dc[1]);
-       __m128d vec_dbH0 = _mm_loadu_pd(&hpf->db[0]), vec_dbH1 = _mm_loadu_pd(&hpf->db[2]), vec_dbH2 = _mm_loadu_pd(&hpf->db[4]), 
-               vec_dbH3 = _mm_loadu_pd(&hpf->db[6]), vec_dbH4 = _mm_loadu_pd(&hpf->db[8]), vec_dbL = _mm_loadu_pd(er_fc->db);
-       __m128d vec_tmp1, vec_input_er, vec_input_rv, vec_mixer, vec_mixap, vec_mixrv, vec_tmp_rv, vec_db, 
-               vec_leveler = MM_LOAD1_PD(&info->leveler), vec_levelrv = MM_LOAD1_PD(&info->levelrv),
-               vec_dry = MM_LOAD1_PD(&info->flt_dry), vec_wet = MM_LOAD1_PD(&info->flt_wet),
-               vec_feedback = MM_LOAD1_PD(&info->feedback), vec_hist = _mm_loadu_pd(info->hist),
-               vec_mix_level = _mm_set1_pd(DIV_MIX_LEVEL);
-       __m128i index2 = _mm_loadu_si128((__m128i *)info->index2), size2 = _mm_loadu_si128((__m128i *)info->size2);
-       __m128i add_idx2 = _mm_set_epi32(1, 1, 2, 1);
-       __m128i sizeap = _mm_set1_epi32(info->size2[REV_EX_AP1]);
-       __m128d vec_ap_fb = _mm_set1_pd(REV_EX_AP_FB), vec_levelap = MM_LOAD1_PD(&info->levelap), 
-               vec_fbap1 = _mm_loadu_pd(info->fb_ap1), vec_fbap2 = _mm_loadu_pd(info->fb_ap2);
-       __m128i vec_index2;
-       __m128d vtmp[2];
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k += 2)
-       {               
-               ALIGN int32 tmpi2[4];
-               vec_input_er = _mm_add_pd(_mm_set1_pd((buf[k] + buf[k + 1]) * DIV_MIX_LEVEL), _mm_mul_pd(vec_hist, vec_feedback));
-               // index2 (rv delay, ap
-               index2 = _mm_add_epi32(index2, add_idx2);
-               index2 = _mm_and_si128(index2, _mm_cmplt_epi32(index2, size2));
-               _mm_store_si128((__m128i *)&tmpi2, index2);
-               // rv delay out
-               vec_input_rv = _mm_load_pd(&bufrd[tmpi2[REV_EX_RD]]);
-               // unit
-               vec_mixer = _mm_setzero_pd(); vec_mixrv = _mm_setzero_pd();
-               for (i = 0; i < info->unit_num; i++) {
-               ALIGN int32 tmpi[4];
-               // index inc
-               __m128i vec_index = _mm_loadu_si128((__m128i *)&info->index[i][0]); 
-               __m128i vec_size = _mm_loadu_si128((__m128i *)&info->size[i][0]);
-               vec_index = _mm_add_epi32(vec_index, _mm_set_epi32(1, 1, 1, 1));
-               vec_index = _mm_and_si128(vec_index, _mm_cmplt_epi32(vec_index, vec_size));
-               _mm_storeu_si128((__m128i *)&info->index[i][0], vec_index);
-               // er
-               _mm_store_si128((__m128i *)&tmpi, vec_index);
-               vec_mixer = _mm_add_pd(vec_mixer, _mm_set_pd(info->buf[i][1][tmpi[1]], info->buf[i][0][tmpi[0]]));
-               _mm_store_sd(&(info->buf[i][0][tmpi[0]]), vec_input_er); 
-               _mm_store_sd(&(info->buf[i][1][tmpi[1]]), _mm_shuffle_pd(vec_input_er, vec_input_er, 0x1));
-               // rv save
-               vec_tmp_rv = _mm_set_pd(*info->rv_in[i][1], *info->rv_in[i][0]);
-               // rv out
-               vec_tmp1 = _mm_set_pd(info->buf[i][3][tmpi[3]], info->buf[i][2][tmpi[2]]);
-               lpf = &rv_fc[i];
-               vec_db = _mm_loadu_pd(lpf->db);
-               vec_db = MM_FMA2_PD(MM_LOAD1_PD(&lpf->dc[0]), vec_tmp1, MM_LOAD1_PD(&lpf->dc[1]), vec_db);
-               _mm_storeu_pd(lpf->db, vec_db);
-               vec_tmp1 = MM_FMA2_PD(vec_tmp1, vec_dry, vec_db, vec_wet);
-               vec_mixrv = _mm_add_pd(vec_mixrv, vec_tmp1);
-               _mm_storeu_pd(info->rv_out[i], vec_tmp1);
-               // rv in
-               vec_tmp1 = _mm_add_pd(vec_input_rv, _mm_mul_pd(vec_tmp_rv, MM_LOAD1_PD(&info->rv_feedback[i])));
-               _mm_store_sd(&(info->buf[i][2][tmpi[2]]), vec_tmp1);
-               _mm_store_sd(&(info->buf[i][3][tmpi[3]]), _mm_shuffle_pd(vec_tmp1, vec_tmp1, 0x1));
-               }
-               // er flt
-               vec_dbL = MM_FMA2_PD(vec_dcL0, vec_mixer, vec_dcL1, vec_dbL);
-               vec_mixer = MM_FMA2_PD(vec_mixer, vec_dry, vec_dbL, vec_wet);   
-               // rv delay in
-               _mm_store_pd(&bufrd[tmpi2[REV_EX_RD]], vec_mixer);      
-               // ap
-               vec_index2 = _mm_shuffle_epi32(index2, 0xAA);
-               vtmp[0] = MM_FMA2_PD(vec_mixer, vec_levelap, vec_fbap1, vec_ap_fb);
-               vtmp[1] = MM_FMA2_PD(vec_mixrv, vec_levelap, vec_fbap2, vec_ap_fb);
-               _mm_store_sd(&info->abuf[REV_EX_ER_L1][tmpi2[REV_EX_AP1]], vtmp[0]); 
-               _mm_store_sd(&info->abuf[REV_EX_ER_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[0], vtmp[0], 0x3)); 
-               _mm_store_sd(&info->abuf[REV_EX_RV_L1][tmpi2[REV_EX_AP1]], vtmp[1]); 
-               _mm_store_sd(&info->abuf[REV_EX_RV_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[1], vtmp[0], 0x3)); 
-               for (i = 0; i < REV_EX_AP_MAX; i++) {
-               ALIGN int32 tmpi[4];
-               __m128i vec_index;
-               vec_index = _mm_sub_epi32(vec_index2, _mm_loadu_si128((__m128i *)info->delaya[i]));
-               vec_index = _mm_add_epi32(vec_index, _mm_and_si128(sizeap, _mm_cmplt_epi32(vec_index, _mm_setzero_si128())));
-               _mm_store_si128((__m128i *)&tmpi, vec_index);
-               // ap1
-               vec_mixer = _mm_add_pd(vec_mixer, _mm_set_pd(info->abuf[1][tmpi[1]], info->abuf[0][tmpi[0]]));
-               // ap2
-               vec_mixrv = _mm_add_pd(vec_mixrv, _mm_set_pd(info->abuf[3][tmpi[3]], info->abuf[2][tmpi[2]]));
                }
-               vec_fbap1 = vec_mixer; vec_fbap2 = vec_mixrv;   
                // out
-               vec_hist = _mm_sub_pd(_mm_mul_pd(vec_mixrv, vec_levelrv), _mm_mul_pd(vec_mixer, vec_leveler));
+               vec_hist = _mm_add_pd(_mm_mul_pd(vec_mixrv, vec_levelrv), _mm_mul_pd(vec_mixer, vec_leveler));
                vec_dbH0 = vec_hist;
                vec_hist = vec_dbH2 = MM_FMA5_PD(vec_dcH0, vec_dbH0, vec_dcH1, vec_dbH1, vec_dcH2, vec_dbH2, vec_dcH3, vec_dbH3, vec_dcH4, vec_dbH4);
                vec_dbH4 = vec_dbH3; vec_dbH3 = vec_dbH2; vec_dbH2 = vec_dbH1; vec_dbH1 = vec_dbH0;
@@ -5787,168 +5289,7 @@ static void do_reverb_ex_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
 }
 
 #else /* floating-point implementation */
-static void do_reverb_ex_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       FLOAT_T leveler = info->leveler, levelrv = info->levelrv, feedback = info->feedback,
-               *rv_feedback = info->rv_feedback, flt_dry = info->flt_dry, flt_wet = info->flt_wet,
-               *dcH = hpf->dc, *dcL = er_fc->dc, st_sprd = info->st_sprd;      
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
-       DATA_T  hist[2] = {info->hist[0], info->hist[1],}, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2], sprd;
-       // CH_STEREO:
-       RDTSC_TEST1
-       for (k = 0; k < count; k++)
-       {               
-#if !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT)
-               input[0] = buf[k] * info->in_level; input[1] = buf[k + 1] * info->in_level;
-#else
-               input[0] = buf[k]; input[1] = buf[k + 1];
-#endif
-               sprd = (input[0] - input[1]) * st_sprd;
-               input[0] = input[1] = (input[0] + input[1]) * DIV_MIX_LEVEL;
-               input[0] += hist[0] * feedback; input[1] += hist[1] * feedback;
-               // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               //unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               for (i = 0; i < info->unit_num; i++) {
-                       // index inc
-                       int32 *index[4] = {&info->index[i][0], &info->index[i][1], &info->index[i][2], &info->index[i][3],};
-                       if (++(*index[0]) >= info->size[i][0]) {*index[0] = 0;}
-                       if (++(*index[1]) >= info->size[i][1]) {*index[1] = 0;}
-                       if (++(*index[2]) >= info->size[i][2]) {*index[2] = 0;}
-                       if (++(*index[3]) >= info->size[i][3]) {*index[3] = 0;}
-                       // er out
-                       dat_er[0] += info->buf[i][0][*index[0]]; dat_er[1] += info->buf[i][1][*index[1]];
-                       // er in
-                       info->buf[i][0][*index[0]] = input[0]; info->buf[i][1][*index[1]] = input[1];
-                       input[0] += sprd; input[1] -= sprd; // spread
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = info->buf[i][2][*index[2]];
-                       tmp1[1] = info->buf[i][3][*index[3]];
-               //      sample_filter_stereo(&rv_fc1[i], &flt[0], &flt[1]);     
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = lpf->dc[0] * tmp1[0] + lpf->dc[1] * lpf->db[0];
-                       lpf->db[1] = lpf->dc[0] * tmp1[1] + lpf->dc[1] * lpf->db[1];
-                       dat_rv[0] += (info->rv_out[i][0] = tmp1[0] * flt_dry + lpf->db[0] * flt_wet);
-                       dat_rv[1] += (info->rv_out[i][1] = tmp1[1] * flt_dry + lpf->db[1] * flt_wet);
-                       // rv in
-                       info->buf[i][2][*index[2]] = input_rv[0] + tmp_rv[0] * rv_feedback[i];
-                       info->buf[i][3][*index[3]] = input_rv[1] + tmp_rv[1] * rv_feedback[i];
-               }
-       //      sample_filter_stereo(er_fc, &dat_er[0], &dat_er[1]);
-               dbL[0] = dcL[0] * dat_er[0] + dcL[1] * dbL[0];
-               dbL[1] = dcL[0] * dat_er[1] + dcL[1] * dbL[1];
-               dat_er[0] = dat_er[0] * flt_dry + dbL[0] * flt_wet;
-               dat_er[1] = dat_er[1] * flt_dry + dbL[1] * flt_wet;     
-               // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
-               // out
-               hist[0] = dat_rv[0] * levelrv - dat_er[0] * leveler;
-               hist[1] = dat_rv[1] * levelrv - dat_er[1] * leveler;
-       //      sample_filter_stereo(hpf, &hist[0], &hist[1]);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = dcH[0] * dbHL[0] + dcH[1] * dbHL[1] + dcH[2] * dbHL[2] + dcH[3] * dbHL[3] + dcH[4] * dbHL[4];
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = dcH[0] * dbHR[0] + dcH[1] * dbHR[1] + dcH[2] * dbHR[2] + dcH[3] * dbHR[3] + dcH[4] * dbHR[4];
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];
-               buf[k] = hist[0]; buf[++k] = hist[1];
-       }
-       info->hist[0] = hist[0], info->hist[1] = hist[1];
-       RDTSC_TEST2
-}
-
-static void do_reverb_ex_chMS(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       FLOAT_T leveler = info->leveler, levelrv = info->levelrv, feedback = info->feedback,
-               *rv_feedback = info->rv_feedback, flt_dry = info->flt_dry, flt_wet = info->flt_wet,
-               *dcH = hpf->dc, *dcL = er_fc->dc;       
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
-       DATA_T  hist[2] = {info->hist[0], info->hist[1],}, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2];
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k++)
-       {               
-#if !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT)
-               input[0] = input[1] = (buf[k] + buf[k + 1]) * DIV_MIX_LEVEL * info->in_level;
-#else
-               input[0] = input[1] = (buf[k] + buf[k + 1]) * DIV_MIX_LEVEL;
-#endif
-               input[0] += hist[0] * feedback; input[1] += hist[1] * feedback;
-               // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               //unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               for (i = 0; i < info->unit_num; i++) {
-                       // index inc
-                       int32 *index[4] = {&info->index[i][0], &info->index[i][1], &info->index[i][2], &info->index[i][3],};
-                       if (++(*index[0]) >= info->size[i][0]) {*index[0] = 0;}
-                       if (++(*index[1]) >= info->size[i][1]) {*index[1] = 0;}
-                       if (++(*index[2]) >= info->size[i][2]) {*index[2] = 0;}
-                       if (++(*index[3]) >= info->size[i][3]) {*index[3] = 0;}
-                       // er out
-                       dat_er[0] += info->buf[i][0][*index[0]]; dat_er[1] += info->buf[i][1][*index[1]];
-                       // er in
-                       info->buf[i][0][*index[0]] = input[0]; info->buf[i][1][*index[1]] = input[1];
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = info->buf[i][2][*index[2]];
-                       tmp1[1] = info->buf[i][3][*index[3]];
-               //      sample_filter_stereo(&rv_fc1[i], &flt[0], &flt[1]);     
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = lpf->dc[0] * tmp1[0] + lpf->dc[1] * lpf->db[0];
-                       lpf->db[1] = lpf->dc[0] * tmp1[1] + lpf->dc[1] * lpf->db[1];
-                       dat_rv[0] += (info->rv_out[i][0] = tmp1[0] * flt_dry + lpf->db[0] * flt_wet);
-                       dat_rv[1] += (info->rv_out[i][1] = tmp1[1] * flt_dry + lpf->db[1] * flt_wet);
-                       // rv in
-                       info->buf[i][2][*index[2]] = input_rv[0] + tmp_rv[0] * rv_feedback[i];
-                       info->buf[i][3][*index[3]] = input_rv[1] + tmp_rv[1] * rv_feedback[i];
-               }
-       //      sample_filter_stereo(er_fc, &dat_er[0], &dat_er[1]);
-               dbL[0] = dcL[0] * dat_er[0] + dcL[1] * dbL[0];
-               dbL[1] = dcL[0] * dat_er[1] + dcL[1] * dbL[1];
-               dat_er[0] = dat_er[0] * flt_dry + dbL[0] * flt_wet;
-               dat_er[1] = dat_er[1] * flt_dry + dbL[1] * flt_wet;             
-               // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
-               // out
-               hist[0] = dat_rv[0] * levelrv - dat_er[0] * leveler;
-               hist[1] = dat_rv[1] * levelrv - dat_er[1] * leveler;
-       //      sample_filter_stereo(hpf, &hist[0], &hist[1]);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = dcH[0] * dbHL[0] + dcH[1] * dbHL[1] + dcH[2] * dbHL[2] + dcH[3] * dbHL[3] + dcH[4] * dbHL[4];
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = dcH[0] * dbHR[0] + dcH[1] * dbHR[1] + dcH[2] * dbHR[2] + dcH[3] * dbHR[3] + dcH[4] * dbHR[4];
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];
-               buf[k] = hist[0]; buf[++k] = hist[1];
-       }
-       info->hist[0] = hist[0], info->hist[1] = hist[1];
-}
-
-static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
+static void do_reverb_ex_chSTMS(DATA_T *buf, int32 count, InfoReverbEX *info)
 {
        int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
        FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
@@ -6013,12 +5354,13 @@ static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
                // rv delay in
                bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
                // ap
+               if(ext_reverb_ex_ap_num){
                if ((++(*indexap)) >= sizeap) {*indexap -= sizeap;}
                info->abuf[0][*indexap] = dat_er[0] * levelap + fb_ap1[0] * REV_EX_AP_FB; 
                info->abuf[1][*indexap] = dat_er[1] * levelap + fb_ap1[1] * REV_EX_AP_FB; 
                info->abuf[2][*indexap] = dat_rv[0] * levelap + fb_ap2[0] * REV_EX_AP_FB; 
                info->abuf[3][*indexap] = dat_rv[1] * levelap + fb_ap2[1] * REV_EX_AP_FB; 
-               for (i = 0; i < REV_EX_AP_MAX; i++) {
+               for (i = 0; i < ext_reverb_ex_ap_num; i++) {
                        int32 index;
                        // ap1 er
                        if((index = *indexap - info->delaya[i][0]) < 0) {index += sizeap;} 
@@ -6033,9 +5375,10 @@ static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
                }
                fb_ap1[0] = dat_er[0]; fb_ap1[1] = dat_er[1];
                fb_ap2[0] = dat_rv[0]; fb_ap2[1] = dat_rv[1];
+               }
                // out
-               hist[0] = dat_rv[0] * levelrv - dat_er[0] * leveler;
-               hist[1] = dat_rv[1] * levelrv - dat_er[1] * leveler;
+               hist[0] = dat_rv[0] * levelrv + dat_er[0] * leveler;
+               hist[1] = dat_rv[1] * levelrv + dat_er[1] * leveler;
        //      sample_filter_stereo(hpf, &hist[0], &hist[1]);
                dbHL[0] = hist[0];      
                hist[0] = dbHL[2] = dcH[0] * dbHL[0] + dcH[1] * dbHL[1] + dcH[2] * dbHL[2] + dcH[3] * dbHL[3] + dcH[4] * dbHL[4];
@@ -6055,114 +5398,10 @@ static void do_reverb_ex_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
        RDTSC_TEST2
 }
 
-static void do_reverb_ex_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       FLOAT_T leveler = info->leveler, levelrv = info->levelrv, feedback = info->feedback,
-               *rv_feedback = info->rv_feedback, flt_dry = info->flt_dry, flt_wet = info->flt_wet,
-               *dcH = hpf->dc, *dcL = er_fc->dc;       
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
-       DATA_T  hist[2] = {info->hist[0], info->hist[1],}, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2];
-       int32 *indexap = &info->index2[REV_EX_AP1], sizeap = info->size2[REV_EX_AP1];
-       DATA_T *fb_ap1 = info->fb_ap1, *fb_ap2 = info->fb_ap2;
-       FLOAT_T levelap = info->levelap;
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k++)
-       {               
-#if !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT)
-               input[0] = input[1] = (buf[k] + buf[k + 1]) * DIV_MIX_LEVEL * info->in_level;
-#else
-               input[0] = input[1] = (buf[k] + buf[k + 1]) * DIV_MIX_LEVEL;
-#endif
-               input[0] += hist[0] * feedback; input[1] += hist[1] * feedback;
-               // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               //unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               for (i = 0; i < info->unit_num; i++) {
-                       // index inc
-                       int32 *index[4] = {&info->index[i][0], &info->index[i][1], &info->index[i][2], &info->index[i][3],};
-                       if (++(*index[0]) >= info->size[i][0]) {*index[0] = 0;}
-                       if (++(*index[1]) >= info->size[i][1]) {*index[1] = 0;}
-                       if (++(*index[2]) >= info->size[i][2]) {*index[2] = 0;}
-                       if (++(*index[3]) >= info->size[i][3]) {*index[3] = 0;}
-                       // er out
-                       dat_er[0] += info->buf[i][0][*index[0]]; dat_er[1] += info->buf[i][1][*index[1]];
-                       // er in
-                       info->buf[i][0][*index[0]] = input[0]; info->buf[i][1][*index[1]] = input[1];
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = info->buf[i][2][*index[2]];
-                       tmp1[1] = info->buf[i][3][*index[3]];
-               //      sample_filter_stereo(&rv_fc1[i], &flt[0], &flt[1]);     
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = lpf->dc[0] * tmp1[0] + lpf->dc[1] * lpf->db[0];
-                       lpf->db[1] = lpf->dc[0] * tmp1[1] + lpf->dc[1] * lpf->db[1];
-                       dat_rv[0] += (info->rv_out[i][0] = tmp1[0] * flt_dry + lpf->db[0] * flt_wet);
-                       dat_rv[1] += (info->rv_out[i][1] = tmp1[1] * flt_dry + lpf->db[1] * flt_wet);
-                       // rv in
-                       info->buf[i][2][*index[2]] = input_rv[0] + tmp_rv[0] * rv_feedback[i];
-                       info->buf[i][3][*index[3]] = input_rv[1] + tmp_rv[1] * rv_feedback[i];
-               }
-       //      sample_filter_stereo(er_fc, &dat_er[0], &dat_er[1]);
-               dbL[0] = dcL[0] * dat_er[0] + dcL[1] * dbL[0];
-               dbL[1] = dcL[0] * dat_er[1] + dcL[1] * dbL[1];
-               dat_er[0] = dat_er[0] * flt_dry + dbL[0] * flt_wet;
-               dat_er[1] = dat_er[1] * flt_dry + dbL[1] * flt_wet;             
-               // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
-               // ap
-               if ((++(*indexap)) >= sizeap) {*indexap -= sizeap;}
-               info->abuf[0][*indexap] = dat_er[0] * levelap + fb_ap1[0] * REV_EX_AP_FB; 
-               info->abuf[1][*indexap] = dat_er[1] * levelap + fb_ap1[1] * REV_EX_AP_FB; 
-               info->abuf[2][*indexap] = dat_rv[0] * levelap + fb_ap2[0] * REV_EX_AP_FB; 
-               info->abuf[3][*indexap] = dat_rv[1] * levelap + fb_ap2[1] * REV_EX_AP_FB; 
-               for (i = 0; i < REV_EX_AP_MAX; i++) {
-                       int32 index;
-                       // ap1 er
-                       if((index = *indexap - info->delaya[i][0]) < 0) {index += sizeap;} 
-                       dat_er[0] += info->abuf[0][index]; 
-                       if((index = *indexap - info->delaya[i][1]) < 0) {index += sizeap;}
-                       dat_er[1] += info->abuf[1][index];      
-                       // ap2 rv
-                       if((index = *indexap - info->delaya[i][2]) < 0) {index += sizeap;}
-                       dat_rv[0] += info->abuf[2][index]; 
-                       if((index = *indexap - info->delaya[i][3]) < 0) {index += sizeap;}
-                       dat_rv[1] += info->abuf[3][index]; 
-               }
-               fb_ap1[0] = dat_er[0]; fb_ap1[1] = dat_er[1];
-               fb_ap2[0] = dat_rv[0]; fb_ap2[1] = dat_rv[1];
-               // out
-               hist[0] = dat_rv[0] * levelrv - dat_er[0] * leveler;
-               hist[1] = dat_rv[1] * levelrv - dat_er[1] * leveler;
-       //      sample_filter_stereo(hpf, &hist[0], &hist[1]);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = dcH[0] * dbHL[0] + dcH[1] * dbHL[1] + dcH[2] * dbHL[2] + dcH[3] * dbHL[3] + dcH[4] * dbHL[4];
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = dcH[0] * dbHR[0] + dcH[1] * dbHR[1] + dcH[2] * dbHR[2] + dcH[3] * dbHR[3] + dcH[4] * dbHR[4];
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];
-               buf[k] = hist[0]; buf[++k] = hist[1];
-       }
-       info->hist[0] = hist[0], info->hist[1] = hist[1];
-}
 #endif
 
 
-static void do_reverb_ex_mod_chST(DATA_T *buf, int32 count, InfoReverbEX *info);
-static void do_reverb_ex_mod_chMS(DATA_T *buf, int32 count, InfoReverbEX *info);
-static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info);
-static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *info);
+static void do_reverb_ex_mod_chSTMS(DATA_T *buf, int32 count, InfoReverbEX *info);
 
 static int rev_ex_mod_init_delay(InfoReverbEX *info, int type, int num, int32 size)
 {
@@ -6196,7 +5435,13 @@ static void init_reverb_ex_mod(InfoReverbEX *info)
        num_div2 = num / 2;
        div_num = 1.0 / (FLOAT_T)num;
        norm = pow(div_num, DIV_3_2);
-       ext_reverb_ex_ap_num = ext_reverb_ex_ap_num ? REV_EX_AP_MAX : 0;
+       if(ext_reverb_ex_ap_num >= REV_EX_AP_MAX)
+               ext_reverb_ex_ap_num = REV_EX_AP_MAX;
+       else if(ext_reverb_ex_ap_num >= 1)
+               ext_reverb_ex_ap_num = 4;
+       else 
+               ext_reverb_ex_ap_num = 0;
+       //ext_reverb_ex_ap_num = ext_reverb_ex_ap_num ? REV_EX_AP_MAX : 0;
        // init
        init_prime_list();
        pdelay_cnt = info->er_time_ms * playmode_rate_ms;       
@@ -6341,7 +5586,7 @@ static void init_reverb_ex_mod(InfoReverbEX *info)
        set_sample_filter_type(&info->hpf, FILTER_NONE);
        init_sample_filter(&info->hpf, REV_EX_HPF_FREQ, 0, FILTER_HPF_BW);      
        info->unit_num = num;
-       info->st_sprd = div_num; // L+,R-
+       info->st_sprd = (info->mode == CH_STEREO) ? div_num : 0.0; // L+,R-
        info->flt_wet = (info->rev_damp_bal + 1.0) * DIV_2;
        info->flt_dry = 1.0 - info->flt_wet;
        info->feedback = info->rev_feedback * REV_EX_FEEDBACK;
@@ -6384,422 +5629,502 @@ static void init_reverb_ex_mod(InfoReverbEX *info)
        // func
        switch(info->mode){
        case CH_STEREO:
-               info->do_reverb_mode = ext_reverb_ex_ap_num ? do_reverb_ex_mod_chST_ap8 : do_reverb_ex_mod_chST;
-               break;
        case CH_MIX_STEREO:
-               info->do_reverb_mode = ext_reverb_ex_ap_num ? do_reverb_ex_mod_chMS_ap8 : do_reverb_ex_mod_chMS;
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)        
+               if(!set_effect_sub_thread(do_reverb_ex_mod_chSTMS_thread1, info, 2)){
+                       info->thread = 1;
+                       info->do_reverb_mode = do_reverb_ex_mod_chSTMS_thread;
+                       break;
+               }else
+                       info->thread = 0;
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               info->do_reverb_mode = do_reverb_ex_mod_chSTMS;
                break;
        default:
                info->do_reverb_mode = do_reverb_ex_none;
                break;
+       }       
+       
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)        
+       if(info->thread){
+               int32 bytes = compute_buffer_size * 2 * sizeof(DATA_T);
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               if(info->tobuf != NULL){
+                       aligned_free(info->tobuf);
+                       info->tobuf = NULL;
+               }
+               info->tobuf = (DATA_T *) aligned_malloc(bytes, ALIGN_SIZE);
+#else
+               if(info->tobuf != NULL){
+                       safe_free(info->tobuf);
+                       info->tobuf = NULL;
+               }
+               info->tobuf = (DATA_T *) safe_large_malloc(bytes);
+#endif
+               memset(info->tobuf, 0, bytes);
+
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               // swap REV_EX_ER_R1 <-> REV_EX_RV_L1
+               // ER_L1,ER_R1,RV_L1,RV_R1 -> ER_L1,RV_L1,ER_R1,RV_R1
+               for(i = 0; i < ext_reverb_ex_rv_num; i++){
+                       FLOAT_T tmp1, tmp2;
+                       tmp1 = info->mcount[i][REV_EX_ER_R1];
+                       tmp2 = info->mcount[i][REV_EX_RV_L1];
+                       info->mcount[i][REV_EX_ER_R1] = tmp2;
+                       info->mcount[i][REV_EX_RV_L1] = tmp1;
+                       tmp1 = info->mrate[i][REV_EX_ER_R1];
+                       tmp2 = info->mrate[i][REV_EX_RV_L1];
+                       info->mrate[i][REV_EX_ER_R1] = tmp2;
+                       info->mrate[i][REV_EX_RV_L1] = tmp1;
+                       tmp1 = info->mdelay[i][REV_EX_ER_R1];
+                       tmp2 = info->mdelay[i][REV_EX_RV_L1];
+                       info->mdelay[i][REV_EX_ER_R1] = tmp2;
+                       info->mdelay[i][REV_EX_RV_L1] = tmp1;
+                       tmp1 = info->mdepth[i][REV_EX_ER_R1];
+                       tmp2 = info->mdepth[i][REV_EX_RV_L1];
+                       info->mdepth[i][REV_EX_ER_R1] = tmp2;
+                       info->mdepth[i][REV_EX_RV_L1] = tmp1;
+                       tmp1 = info->mphase[i][REV_EX_ER_R1];
+                       tmp2 = info->mphase[i][REV_EX_RV_L1];
+                       info->mphase[i][REV_EX_ER_R1] = tmp2;
+                       info->mphase[i][REV_EX_RV_L1] = tmp1;
+               }               
+               if(ext_reverb_ex_ap_num){
+               for (i = 0; i < ext_reverb_ex_ap_num; i++) {
+                       FLOAT_T tmp1, tmp2;
+                       tmp1 = info->acount[i][REV_EX_ER_R1];
+                       tmp2 = info->acount[i][REV_EX_RV_L1];
+                       info->acount[i][REV_EX_ER_R1] = tmp2;
+                       info->acount[i][REV_EX_RV_L1] = tmp1;
+                       tmp1 = info->arate[i][REV_EX_ER_R1];
+                       tmp2 = info->arate[i][REV_EX_RV_L1];
+                       info->arate[i][REV_EX_ER_R1] = tmp2;
+                       info->arate[i][REV_EX_RV_L1] = tmp1;
+                       tmp1 = info->adelay[i][REV_EX_ER_R1];
+                       tmp2 = info->adelay[i][REV_EX_RV_L1];
+                       info->adelay[i][REV_EX_ER_R1] = tmp2;
+                       info->adelay[i][REV_EX_RV_L1] = tmp1;
+                       tmp1 = info->adepth[i][REV_EX_ER_R1];
+                       tmp2 = info->adepth[i][REV_EX_RV_L1];
+                       info->adepth[i][REV_EX_ER_R1] = tmp2;
+                       info->adepth[i][REV_EX_RV_L1] = tmp1;
+                       tmp1 = info->aphase[i][REV_EX_ER_R1];
+                       tmp2 = info->aphase[i][REV_EX_RV_L1];
+                       info->aphase[i][REV_EX_ER_R1] = tmp2;
+                       info->aphase[i][REV_EX_RV_L1] = tmp1;
+               }
+               }
+#endif
+               info->tcount = 0;
+               info->index2t[0] = info->index2[0];
+               info->index2t[1] = info->index2[1];
+               info->index2t[2] = info->index2[2];
+               info->index2t[3] = info->index2[3];
        }
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)  
+
        if(error) info->do_reverb_mode = do_reverb_ex_none; // safe
        info->init = 1;
 }
 
-#if (OPT_MODE == 1) && !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT) /* fixed-point implementation */
-static void do_reverb_ex_mod_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
+
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)        
+static void do_reverb_ex_mod_chSTMS_thread(DATA_T *buf, int32 count, InfoReverbEX *info)
 {
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       int32 *mindex = &info->index2[REV_EX_UNIT], msize = info->size2[REV_EX_UNIT];
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       int32 leveler = info->leveleri, levelrv = info->levelrvi, feedback = info->feedbacki,
-               *rv_feedback = info->rv_feedbacki, flt_dry = info->flt_dryi, flt_wet = info->flt_weti,
-               *dcH = hpf->dc, *dcL = er_fc->dc, in_level = info->in_leveli, st_sprd = info->st_sprdi;
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[8], *dbL = er_fc->db; 
-       DATA_T *hist = info->hist, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2], sprd;
-       FLOAT_T mindexf;
-       // CH_STEREO:
-       for (k = 0; k < count; k++)
-       {               
-               input[0] = buf[k]; input[1] = buf[k + 1];
-               sprd = imuldiv24((input[0] - input[1]), st_sprd);
-               input[0] = input[1] = imuldiv24((input[0] + input[1]), in_level);
-               input[0] += imuldiv24(hist[0], feedback); input[1] += imuldiv24(hist[1], feedback);
-               // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               // unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               if((++*mindex) >= msize) {*mindex = 0;}
-               mindexf = *mindex;
-               for (i = 0; i < info->unit_num; i++) {
-                       int32 index[4];
-                       FLOAT_T v1[4], v2[4];
-                       FLOAT_T fp1[4], fp2[4]; 
-                       // lfo
-                       info->mcount[i][REV_EX_ER_L1] += info->mrate[i][REV_EX_ER_L1];
-                       info->mcount[i][REV_EX_ER_L1] -= floor(info->mcount[i][REV_EX_ER_L1]);
-                       info->mcount[i][REV_EX_ER_R1] += info->mrate[i][REV_EX_ER_R1];
-                       info->mcount[i][REV_EX_ER_R1] -= floor(info->mcount[i][REV_EX_ER_R1]);
-                       info->mcount[i][REV_EX_RV_L1] += info->mrate[i][REV_EX_RV_L1];
-                       info->mcount[i][REV_EX_RV_L1] -= floor(info->mcount[i][REV_EX_RV_L1]);
-                       info->mcount[i][REV_EX_RV_R1] += info->mrate[i][REV_EX_RV_R1];
-                       info->mcount[i][REV_EX_RV_R1] -= floor(info->mcount[i][REV_EX_RV_R1]);
-                       fp1[0] = mindexf - info->mdelay[i][REV_EX_ER_L1] - info->mdepth[i][REV_EX_ER_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_L1] + info->mphase[i][REV_EX_ER_L1]);        
-                       fp1[1] = mindexf - info->mdelay[i][REV_EX_ER_R1] - info->mdepth[i][REV_EX_ER_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_R1] + info->mphase[i][REV_EX_ER_R1]);        
-                       fp1[2] = mindexf - info->mdelay[i][REV_EX_RV_L1] - info->mdepth[i][REV_EX_RV_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_L1] + info->mphase[i][REV_EX_RV_L1]);        
-                       fp1[3] = mindexf - info->mdelay[i][REV_EX_RV_R1] - info->mdepth[i][REV_EX_RV_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_R1] + info->mphase[i][REV_EX_RV_R1]);        
-                       if(fp1[0] < 0) {fp1[0] += msize;}
-                       if(fp1[1] < 0) {fp1[1] += msize;}               
-                       if(fp1[2] < 0) {fp1[2] += msize;}
-                       if(fp1[3] < 0) {fp1[3] += msize;}                       
-                       fp2[0] = floor(fp1[0]); index[0] = fp2[0]; 
-                       fp2[1] = floor(fp1[1]); index[1] = fp2[1];
-                       fp2[2] = floor(fp1[2]); index[2] = fp2[2]; 
-                       fp2[3] = floor(fp1[3]); index[3] = fp2[3]; 
-                       v1[0] = info->buf[i][REV_EX_ER_L1][index[0]]; v2[0] = info->buf[i][REV_EX_ER_L1][index[0] + 1];
-                       v1[1] = info->buf[i][REV_EX_ER_R1][index[1]]; v2[1] = info->buf[i][REV_EX_ER_R1][index[1] + 1];
-                       v1[2] = info->buf[i][REV_EX_RV_L1][index[2]]; v2[2] = info->buf[i][REV_EX_RV_L1][index[2] + 1];
-                       v1[3] = info->buf[i][REV_EX_RV_R1][index[3]]; v2[3] = info->buf[i][REV_EX_RV_R1][index[3] + 1];
-                       // er out
-                       dat_er[0] += v1[0] + (v2[0] - v1[0]) * (fp1[0] - fp2[0]); // linear interpolation
-                       dat_er[1] += v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
-                       // er in
-                       info->buf[i][0][*mindex] = input[0]; info->buf[i][1][*mindex] = input[1];
-                       input[0] += sprd; input[1] -= sprd; // spread
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = v1[2] + (v2[2] - v1[2]) * (fp1[2] - fp2[2]); // linear interpolation
-                       tmp1[1] = v1[3] + (v2[3] - v1[3]) * (fp1[3] - fp2[3]); // linear interpolation
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = imuldiv28(tmp1[0], lpf->dc[0]) + imuldiv28(lpf->db[0], lpf->dc[1]);
-                       lpf->db[1] = imuldiv28(tmp1[1], lpf->dc[0]) + imuldiv28(lpf->db[1], lpf->dc[1]);
-                       dat_rv[0] += (info->rv_out[i][0] = imuldiv24(tmp1[0], flt_dry) + imuldiv24(lpf->db[0], flt_wet));
-                       dat_rv[1] += (info->rv_out[i][1] = imuldiv24(tmp1[1], flt_dry) + imuldiv24(lpf->db[1], flt_wet));
-                       // rv in
-                       info->buf[i][2][*mindex] = input_rv[0] + imuldiv24(tmp_rv[0], rv_feedback[i]);
-                       info->buf[i][3][*mindex] = input_rv[1] + imuldiv24(tmp_rv[1], rv_feedback[i]);
-
-                       if(*mindex == 0){
-                               info->buf[i][0][msize] = info->buf[i][0][0];
-                               info->buf[i][1][msize] = info->buf[i][1][0];
-                               info->buf[i][2][msize] = info->buf[i][2][0];
-                               info->buf[i][3][msize] = info->buf[i][3][0];
-                       }
+       int32 i;
+       if(info->thread){
+               info->tcount = count;
+               info->tibuf = buf; //in
+               go_effect_sub_thread(do_reverb_ex_mod_chSTMS_thread1, info, 2);
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               for (i = 0; i < count; i += 2){         
+                       _mm_store_pd(&buf[i], _mm_load_pd(&info->tobuf[i])); // out
                }
-               // er flt
-               dbL[0] = imuldiv28(dat_er[0], dcL[0]) + imuldiv28(dbL[0], dcL[1]);
-               dbL[1] = imuldiv28(dat_er[1], dcL[0]) + imuldiv28(dbL[1], dcL[1]);
-               dat_er[0] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[0], flt_wet);
-               dat_er[1] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[1], flt_wet);
-               // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
-               // out
-               hist[0] = imuldiv16(dat_rv[0], levelrv) - imuldiv16(dat_er[0], leveler);
-               hist[1] = imuldiv16(dat_rv[1], levelrv) - imuldiv16(dat_er[1], leveler);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = imuldiv28(dbHL[0], dcH[0]) + imuldiv28(dbHL[1], dcH[1]) + imuldiv28(dbHL[2], dcH[2])
-                       - imuldiv28(dbHL[3], dcH[3]) - imuldiv28(dbHL[4], dcH[4]);
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = imuldiv28(dbHR[0], dcH[0]) + imuldiv28(dbHR[1], dcH[1]) + imuldiv28(dbHR[2], dcH[2])
-                       - imuldiv28(dbHR[3], dcH[3]) - imuldiv28(dbHR[4], dcH[4]);
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];              
-               buf[k] = hist[0]; buf[++k] = hist[1];
+#else
+               for (i = 0; i < count; i++){                    
+                       buf[i] = info->tbuf[i]; 
+                       i++;
+                       buf[i] = info->tbuf[i]; 
+               }
+#endif
+               return;
        }
 }
 
-static void do_reverb_ex_mod_chMS(DATA_T *buf, int32 count, InfoReverbEX *info)
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)     
+static void do_reverb_ex_mod_chSTMS_thread1(int thread_num, void *info2)
 {
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       int32 *mindex = &info->index2[REV_EX_UNIT], msize = info->size2[REV_EX_UNIT];
+       InfoReverbEX *info;
+       int32 i, k = 0;
+       DATA_T *ibuf, *obuf;
+       int32 chofs0, chofs2, chofslfo;
+       FILTER_T *dbH;
+       DATA_T in[2], *pin[2], hist;
+       __m128d vec_fbap; 
+       __m128i index2;
+               
+       if(thread_num >= 2)
+               return;
+       if(!info2)
+               return;
+       info = (InfoReverbEX *)info2;
+       if(!info->init)
+               return;
+       ibuf = info->tibuf;
+       obuf = info->tobuf;
+       if(thread_num == 0){ // L
+               chofs0 = 0;
+               chofs2 = 2;
+               chofslfo = 0;
+               dbH = &info->hpf.db[0];
+               pin[0] = &in[0];
+               pin[1] = &in[1];
+               hist = info->hist[0];
+               vec_fbap = _mm_loadu_pd(info->fb_ap1);
+               index2 = _mm_loadu_si128((__m128i *)info->index2);
+       }else if(thread_num == 1){ // R
+               chofs0 = 1;
+               chofs2 = 3;
+               chofslfo = 2;
+               dbH = &info->hpf.db[5];
+               pin[0] = &in[1];
+               pin[1] = &in[0];
+               hist = info->hist[1];
+               vec_fbap = _mm_loadu_pd(info->fb_ap2);
+               index2 = _mm_loadu_si128((__m128i *)info->index2t);
+       }else
+               return; 
+       {
+       DATA_T *bufrd = info->buf2[REV_EX_RD];
+       int32 mindex, msize = info->size2[REV_EX_UNIT], asize = info->size2[REV_EX_AP1];        
        FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       int32 leveler = info->leveleri, levelrv = info->levelrvi, feedback = info->feedbacki,
-               *rv_feedback = info->rv_feedbacki, flt_dry = info->flt_dryi, flt_wet = info->flt_weti,
-               *dcH = hpf->dc, *dcL = er_fc->dc, in_level = info->in_leveli;
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
-       DATA_T  *hist = info->hist, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2];
-       FLOAT_T mindexf;
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k++)
+       FILTER_T *dbL = er_fc->db;
+       FLOAT_T leveler = info->leveler, levelrv = info->levelrv, feedback = info->feedback,
+               levelap = info->levelap, st_sprd = info->st_sprd,
+               *rv_feedback = info->rv_feedback, flt_dry = info->flt_dry, flt_wet = info->flt_wet,
+               *dcH = hpf->dc, *dcL = er_fc->dc;       
+       DATA_T input, sprd, input_rv, dat_er, dat_rv, tmp_rv, tmp1;
+       __m128d vmsize = _mm_set1_pd(info->size2[REV_EX_UNIT]), vmi;
+       __m128d vasize = _mm_set1_pd(info->size2[REV_EX_AP1]), vai;
+       __m128i add_idx2 = _mm_set_epi32(1, 1, 2, 1), size2 = _mm_loadu_si128((__m128i *)info->size2);
+       __m128d vec_ap_fb = _mm_set1_pd(REV_EX_AP_FB), vec_levelap = MM_LOAD1_PD(&info->levelap);
+       
+       for (k = 0; k < info->tcount; k += 2)
        {               
-               input[0] = input[1] = imuldiv24((buf[k] + buf[k + 1]), info->in_level);
-               input[0] += imuldiv24(hist[0], feedback); input[1] += imuldiv24(hist[1], feedback);
+               __m128d vvtmp[2];
+               ALIGN int32 tmpi2[4];
+#if !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT)
+               in[0] = ibuf[k] * info->in_level; in[1] = ibuf[k + 1] * info->in_level;
+#else
+               in[0] = ibuf[k]; in[1] = ibuf[k + 1];
+#endif
+               sprd = (*pin[0] - *pin[1]) * st_sprd;
+               input = (*pin[0] + *pin[1]) * DIV_MIX_LEVEL;
+               input += hist * feedback;
+               // index2 (rv delay, ap
+               index2 = _mm_add_epi32(index2, add_idx2);
+               index2 = _mm_and_si128(index2, _mm_cmplt_epi32(index2, size2));
+               _mm_store_si128((__m128i *)&tmpi2, index2);
                // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               // unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               if((++*mindex) >= msize) {*mindex = 0;}
-               mindexf = *mindex;
+               input_rv = bufrd[tmpi2[REV_EX_RD] + chofs0];
+               //unit
+               dat_er = 0; dat_rv = 0;
+               vmi = _mm_cvtepi32_pd(_mm_shuffle_epi32(index2, 0x0));
+               mindex = _mm_cvtsi128_si32(index2);
                for (i = 0; i < info->unit_num; i++) {
-                       int32 index[4];
-                       FLOAT_T v1[4], v2[4];
-                       FLOAT_T fp1[4], fp2[4]; 
-                       // lfo
-                       info->mcount[i][REV_EX_ER_L1] += info->mrate[i][REV_EX_ER_L1];
-                       info->mcount[i][REV_EX_ER_L1] -= floor(info->mcount[i][REV_EX_ER_L1]);
-                       info->mcount[i][REV_EX_ER_R1] += info->mrate[i][REV_EX_ER_R1];
-                       info->mcount[i][REV_EX_ER_R1] -= floor(info->mcount[i][REV_EX_ER_R1]);
-                       info->mcount[i][REV_EX_RV_L1] += info->mrate[i][REV_EX_RV_L1];
-                       info->mcount[i][REV_EX_RV_L1] -= floor(info->mcount[i][REV_EX_RV_L1]);
-                       info->mcount[i][REV_EX_RV_R1] += info->mrate[i][REV_EX_RV_R1];
-                       info->mcount[i][REV_EX_RV_R1] -= floor(info->mcount[i][REV_EX_RV_R1]);
-                       fp1[0] = mindexf - info->mdelay[i][REV_EX_ER_L1] - info->mdepth[i][REV_EX_ER_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_L1] + info->mphase[i][REV_EX_ER_L1]);        
-                       fp1[1] = mindexf - info->mdelay[i][REV_EX_ER_R1] - info->mdepth[i][REV_EX_ER_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_R1] + info->mphase[i][REV_EX_ER_R1]);        
-                       fp1[2] = mindexf - info->mdelay[i][REV_EX_RV_L1] - info->mdepth[i][REV_EX_RV_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_L1] + info->mphase[i][REV_EX_RV_L1]);        
-                       fp1[3] = mindexf - info->mdelay[i][REV_EX_RV_R1] - info->mdepth[i][REV_EX_RV_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_R1] + info->mphase[i][REV_EX_RV_R1]);        
-                       if(fp1[0] < 0) {fp1[0] += msize;}
-                       if(fp1[1] < 0) {fp1[1] += msize;}               
-                       if(fp1[2] < 0) {fp1[2] += msize;}
-                       if(fp1[3] < 0) {fp1[3] += msize;}                       
-                       fp2[0] = floor(fp1[0]); index[0] = fp2[0]; 
-                       fp2[1] = floor(fp1[1]); index[1] = fp2[1];
-                       fp2[2] = floor(fp1[2]); index[2] = fp2[2]; 
-                       fp2[3] = floor(fp1[3]); index[3] = fp2[3]; 
-                       v1[0] = info->buf[i][REV_EX_ER_L1][index[0]]; v2[0] = info->buf[i][REV_EX_ER_L1][index[0] + 1];
-                       v1[1] = info->buf[i][REV_EX_ER_R1][index[1]]; v2[1] = info->buf[i][REV_EX_ER_R1][index[1] + 1];
-                       v1[2] = info->buf[i][REV_EX_RV_L1][index[2]]; v2[2] = info->buf[i][REV_EX_RV_L1][index[2] + 1];
-                       v1[3] = info->buf[i][REV_EX_RV_R1][index[3]]; v2[3] = info->buf[i][REV_EX_RV_R1][index[3] + 1];
-                       // er out
-                       dat_er[0] += v1[0] + (v2[0] - v1[0]) * (fp1[0] - fp2[0]); // linear interpolation
-                       dat_er[1] += v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
-                       // er in
-                       info->buf[i][0][*mindex] = input[0]; info->buf[i][1][*mindex] = input[1];
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = v1[2] + (v2[2] - v1[2]) * (fp1[2] - fp2[2]); // linear interpolation
-                       tmp1[1] = v1[3] + (v2[3] - v1[3]) * (fp1[3] - fp2[3]); // linear interpolation
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = imuldiv28(tmp1[0], lpf->dc[0]) + imuldiv28(lpf->db[0], lpf->dc[1]);
-                       lpf->db[1] = imuldiv28(tmp1[1], lpf->dc[0]) + imuldiv28(lpf->db[1], lpf->dc[1]);
-                       dat_rv[0] += (info->rv_out[i][0] = imuldiv24(tmp1[0], flt_dry) + imuldiv24(lpf->db[0], flt_wet));
-                       dat_rv[1] += (info->rv_out[i][1] = imuldiv24(tmp1[1], flt_dry) + imuldiv24(lpf->db[1], flt_wet));
-                       // rv in
-                       info->buf[i][2][*mindex] = input_rv[0] + imuldiv24(tmp_rv[0], rv_feedback[i]);
-                       info->buf[i][3][*mindex] = input_rv[1] + imuldiv24(tmp_rv[1], rv_feedback[i]);
-
-                       if(*mindex == 0){
-                               info->buf[i][0][msize] = info->buf[i][0][0];
-                               info->buf[i][1][msize] = info->buf[i][1][0];
-                               info->buf[i][2][msize] = info->buf[i][2][0];
-                               info->buf[i][3][msize] = info->buf[i][3][0];
-                       }
+               __m128d vc, vr, vd, vfp, vv1, vv2, vtmp[2];
+               __m128i vindex;                 
+               // lfo
+               vc = _mm_add_pd(_mm_loadu_pd(&info->mcount[i][chofslfo]), _mm_loadu_pd(&info->mrate[i][chofslfo])); // mcount+mrate
+#if (USE_X86_EXT_INTRIN >= 6) // sse4.1
+               vc = _mm_sub_pd(vc, _mm_floor_pd(vc)); // mcount-=floor(mcount)
+#else
+               vc = _mm_sub_pd(vc, _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc))); // mcount-=(int)(mcount) +\82Ì\82Ý
+#endif
+               _mm_storeu_pd(&info->mcount[i][chofslfo], vc);
+               vr = _mm_add_pd(vc, _mm_loadu_pd(&info->mphase[i][chofslfo])); // mcount+mphase
+               vd = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr,1)), lookup2_sine_p(MM_EXTRACT_F64(vr,0))); // lookup2_sine_p(mc)
+               vd = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][chofslfo]), vd); // mdepth* sine
+               vfp = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][chofslfo])), vd); // mindex-mdelay-mdepth
+               vfp = _mm_add_pd(vfp, _mm_and_pd(vmsize, _mm_cmplt_pd(vfp, _mm_setzero_pd()))); // fp<0 ? fp+msize      
+               vindex = _mm_cvttpd_epi32(vfp); // (int)floor(fp)
+#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 floor
+               vfp = _mm_sub_pd(vfp, _mm_floor_pd(vfp)); // fp-floor(fp)
+#else
+               vfp = _mm_sub_pd(vfp, _mm_cvtepi32_pd(vindex)); // fp-vindex
+#endif
+               vtmp[0] = _mm_loadu_pd(&info->buf[i][chofs0][MM_EXTRACT_I32(vindex,0)]); // v1v2
+               vtmp[1] = _mm_loadu_pd(&info->buf[i][chofs2][MM_EXTRACT_I32(vindex,1)]); // v1v2
+               vv1 = _mm_unpacklo_pd(vtmp[0], vtmp[1]);
+               vv2 = _mm_unpackhi_pd(vtmp[0], vtmp[1]);
+               vv1 = MM_FMA_PD(_mm_sub_pd(vv2, vv1), vfp, vv1);
+               // er out
+               dat_er += MM_EXTRACT_F64(vv1, 0); // linear interpolation
+               // er in
+               info->buf[i][chofs0][mindex] = input;
+               input += sprd; // spread
+               // rv save
+               tmp_rv = *info->rv_in[i][chofs0];
+               // rv out       
+               tmp1 = MM_EXTRACT_F64(vv1, 1); // linear interpolation
+       //      sample_filter_stereo(&rv_fc1[i], &flt[0], &flt[1]);     
+               lpf = &rv_fc[i];
+               lpf->db[chofs0] = lpf->dc[0] * tmp1 + lpf->dc[1] * lpf->db[chofs0];
+               dat_rv += (info->rv_out[i][chofs0] = tmp1 * flt_dry + lpf->db[chofs0] * flt_wet);
+               // rv in
+               info->buf[i][chofs2][mindex] = input_rv + tmp_rv * rv_feedback[i];
+               if(mindex == 0){
+                       info->buf[i][chofs0][msize] = info->buf[i][chofs0][0];
+                       info->buf[i][chofs2][msize] = info->buf[i][chofs2][0];
                }
-               // er flt
-               dbL[0] = imuldiv28(dat_er[0], dcL[0]) + imuldiv28(dbL[0], dcL[1]);
-               dbL[1] = imuldiv28(dat_er[1], dcL[0]) + imuldiv28(dbL[1], dcL[1]);
-               dat_er[0] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[0], flt_wet);
-               dat_er[1] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[1], flt_wet);
+               }
+               dbL[chofs0] = dcL[0] * dat_er + dcL[1] * dbL[chofs0];
+               dat_er = dat_er * flt_dry + dbL[chofs0] * flt_wet;
                // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
+               bufrd[tmpi2[REV_EX_RD] + chofs0] = dat_er;
+               // ap
+               if(ext_reverb_ex_ap_num){
+               __m128d vdat = _mm_set_pd(dat_rv, dat_er);
+               vai = _mm_cvtepi32_pd(_mm_shuffle_epi32(index2, 0xAA));
+               vvtmp[0] = MM_FMA2_PD(vdat, vec_levelap, vec_fbap, vec_ap_fb);
+               info->abuf[chofs0][tmpi2[REV_EX_AP1]] = MM_EXTRACT_F64(vvtmp[0],0); 
+               info->abuf[chofs2][tmpi2[REV_EX_AP1]] = MM_EXTRACT_F64(vvtmp[0],1); 
+               if(tmpi2[REV_EX_AP1] == 0){
+                       info->abuf[chofs0][asize] = info->abuf[chofs0][0];
+                       info->abuf[chofs2][asize] = info->abuf[chofs2][0];
+               }       
+               for (i = 0; i < ext_reverb_ex_ap_num; i++) {
+               __m128d vc, vr, vd, vfp, vv1, vv2, vtmp[2];
+               __m128i vindex;                 
+               // lfo
+               vc = _mm_add_pd(_mm_loadu_pd(&info->acount[i][chofslfo]), _mm_loadu_pd(&info->arate[i][chofslfo])); // mcount+mrate
+#if (USE_X86_EXT_INTRIN >= 6) // sse4.1
+               vc = _mm_sub_pd(vc, _mm_floor_pd(vc)); // count-=floor(count)
+#else
+               vc = _mm_sub_pd(vc, _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc))); // count-=floor(count) +\82Ì\82Ý
+#endif
+               _mm_storeu_pd(&info->acount[i][chofslfo], vc);
+               vr = _mm_add_pd(vc, _mm_loadu_pd(&info->aphase[i][chofslfo])); // count+phase
+               vd = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr,1)), lookup2_sine_p(MM_EXTRACT_F64(vr,0))); // lookup2_sine_p(count)
+               vd = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][chofslfo]), vd); // depth* sine
+               vfp = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][chofslfo])), vd); // index-delay-depth           
+               vfp = _mm_add_pd(vfp, _mm_and_pd(vasize, _mm_cmplt_pd(vfp, _mm_setzero_pd()))); // fp<0 ? fp+size               
+               vindex = _mm_cvttpd_epi32(vfp); // (int)floor(fp)
+#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 floor
+               vfp = _mm_sub_pd(vfp, _mm_floor_pd(vfp)); // fp-floor(fp)
+#else
+               vfp = _mm_sub_pd(vfp, _mm_cvtepi32_pd(vindex)); // fp-vindex
+#endif
+               vtmp[0] = _mm_loadu_pd(&info->abuf[chofs0][MM_EXTRACT_I32(vindex,0)]); // v1v2
+               vtmp[1] = _mm_loadu_pd(&info->abuf[chofs2][MM_EXTRACT_I32(vindex,1)]); // v1v2
+               vv1 = _mm_unpacklo_pd(vtmp[0], vtmp[1]);
+               vv2 = _mm_unpackhi_pd(vtmp[0], vtmp[1]);
+               vv1 = MM_FMA_PD(_mm_sub_pd(vv2, vv1), vfp, vv1); // linear interpolation
+               vdat = _mm_add_pd(vdat, vv1); // dat_er += , dat_er +=
+               }
+               vec_fbap = vdat; // dat_er  // dat_rv
+               dat_er = MM_EXTRACT_F64(vdat,0);
+               dat_rv = MM_EXTRACT_F64(vdat,1);
+               }
                // out
-               hist[0] = imuldiv16(dat_rv[0], levelrv) - imuldiv16(dat_er[0], leveler);
-               hist[1] = imuldiv16(dat_rv[1], levelrv) - imuldiv16(dat_er[1], leveler);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = imuldiv28(dbHL[0], dcH[0]) + imuldiv28(dbHL[1], dcH[1]) + imuldiv28(dbHL[2], dcH[2])
-                       - imuldiv28(dbHL[3], dcH[3]) - imuldiv28(dbHL[4], dcH[4]);
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = imuldiv28(dbHR[0], dcH[0]) + imuldiv28(dbHR[1], dcH[1]) + imuldiv28(dbHR[2], dcH[2])
-                       - imuldiv28(dbHR[3], dcH[3]) - imuldiv28(dbHR[4], dcH[4]);
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];              
-               buf[k] = hist[0]; buf[++k] = hist[1];
+               hist = dat_rv * levelrv + dat_er * leveler;
+               dbH[0] = hist;  
+               hist = dbH[2] = dcH[0] * dbH[0] + dcH[1] * dbH[1] + dcH[2] * dbH[2] + dcH[3] * dbH[3] + dcH[4] * dbH[4];
+               dbH[4] = dbH[3];
+               dbH[3] = dbH[2];
+               dbH[2] = dbH[1];
+               dbH[1] = dbH[0];
+               obuf[k + chofs0] = hist;
+       }
+       info->hist[chofs0] = hist;      
+       if(thread_num == 0){ // L
+               _mm_storeu_pd(info->fb_ap1, vec_fbap);
+               _mm_storeu_si128((__m128i *)info->index2, index2);
+       }else if(thread_num == 1){ // R
+               _mm_storeu_pd(info->fb_ap2, vec_fbap);
+               _mm_storeu_si128((__m128i *)info->index2t, index2);
+       }
        }
 }
-
-static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
+#else
+static void do_reverb_ex_mod_chSTMS_thread1(int thread_num, void *info2)
 {
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       int32 *mindex = &info->index2[REV_EX_UNIT], msize = info->size2[REV_EX_UNIT];
-       int32 *aindex = &info->index2[REV_EX_AP1], asize = info->size2[REV_EX_AP1];
+       InfoReverbEX *info;
+       int32 i, k = 0;
+       DATA_T *ibuf, *obuf, *phist, *fb_ap1, *fb_ap2;
+       int32 *indexrd, *mindex, *aindex;
+       FILTER_T *dbH;
+       int32 chofs0, chofs2;
+       DATA_T in[2], *pin[2];
+               
+       if(thread_num >= 2)
+               return;
+       if(!info2)
+               return;
+       info = (InfoReverbEX *)info2;
+       if(!info->init)
+               return;
+       ibuf = info->tibuf;
+       obuf = info->tobuf;
+       if(thread_num == 0){ // L
+               chofs0 = 0;
+               chofs2 = 2;
+               phist = &info->hist[0];
+               indexrd = &info->index2[REV_EX_RD];     
+               mindex = &info->index2[REV_EX_UNIT];
+               aindex = &info->index2[REV_EX_AP1];
+               fb_ap1 = &info->fb_ap1[0];
+               fb_ap2 = &info->fb_ap2[0];
+               dbH = &info->hpf.db[0];
+               pin[0] = &in[0];
+               pin[1] = &in[1];
+       }else if(thread_num == 1){ // R
+               chofs0 = 1;
+               chofs2 = 3;
+               phist = &info->hist[1];
+               indexrd = &info->index2t[REV_EX_RD];
+               mindex = &info->index2t[REV_EX_UNIT];
+               aindex = &info->index2t[REV_EX_AP1];
+               fb_ap1 = &info->fb_ap1[1];
+               fb_ap2 = &info->fb_ap2[1];
+               dbH = &info->hpf.db[5];
+               pin[0] = &in[1];
+               pin[1] = &in[0];
+       }else
+               return; 
+       {
+       DATA_T *bufrd = info->buf2[REV_EX_RD];
+       int32 msize = info->size2[REV_EX_UNIT];
+       int32 asize = info->size2[REV_EX_AP1];
+       int32 sizerd = info->size2[REV_EX_RD];
        FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       int32 leveler = info->leveleri, levelrv = info->levelrvi, feedback = info->feedbacki,
-               *rv_feedback = info->rv_feedbacki, flt_dry = info->flt_dryi, flt_wet = info->flt_weti,
-               *dcH = hpf->dc, *dcL = er_fc->dc, in_level = info->in_leveli, st_sprd = info->st_sprdi;
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
-       DATA_T  *hist = info->hist, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2], sprd;
-       DATA_T *bufa1 = info->buf2[REV_EX_AP1], *bufa2 = info->buf2[REV_EX_AP2], 
-               *fb_ap1 = info->fb_ap1, *fb_ap2 = info->fb_ap2;
-       int32 levelap = info->levelapi;
-       const int32 apfbi = TIM_FSCALE(REV_EX_AP_FB, 24);
+       FILTER_T *dbL = er_fc->db;
+       FLOAT_T leveler = info->leveler, levelrv = info->levelrv, feedback = info->feedback,
+               *rv_feedback = info->rv_feedback, flt_dry = info->flt_dry, flt_wet = info->flt_wet,
+               *dcH = hpf->dc, *dcL = er_fc->dc;       
+       FLOAT_T st_sprd = info->st_sprd;
+       FLOAT_T levelap = info->levelap;
+       DATA_T tmp[2], tmp1;
+       DATA_T input, sprd, hist = *phist, input_rv, dat_er, dat_rv, tmp_rv;
        FLOAT_T mindexf, aindexf;
-       // CH_STEREO:
-       for (k = 0; k < count; k++)
+
+       for (k = 0; k < info->tcount; k += 2)
        {               
-               input[0] = buf[k]; input[1] = buf[k + 1];
-               sprd = imuldiv24((input[0] - input[1]), st_sprd);
-               input[0] = input[1] = imuldiv24((input[0] + input[1]), in_level);
-               input[0] += imuldiv24(hist[0], feedback); input[1] += imuldiv24(hist[1], feedback);
+#if !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT)
+               in[0] = ibuf[k] * info->in_level; in[1] = ibuf[k + 1] * info->in_level;
+#else
+               in[0] = ibuf[k]; in[1] = ibuf[k + 1];
+#endif
+               sprd = (*pin[0] - *pin[1]) * st_sprd;
+               input = (*pin[0] + *pin[1]) * DIV_MIX_LEVEL;
+               input += hist * feedback;
                // rv delay out
                if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               // unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
+               input_rv = bufrd[*indexrd + chofs0];
+               //unit
+               dat_er = 0; dat_rv = 0;
                if((++*mindex) >= msize) {*mindex = 0;}
                mindexf = *mindex;
                for (i = 0; i < info->unit_num; i++) {
-                       int32 index[4];
-                       FLOAT_T v1[4], v2[4];
-                       FLOAT_T fp1[4], fp2[4]; 
+                       int32 index[2];
+                       DATA_T v1[2], v2[2];
+                       FLOAT_T fp1[2], fp2[2]; 
                        // lfo
-                       info->mcount[i][REV_EX_ER_L1] += info->mrate[i][REV_EX_ER_L1];
-                       info->mcount[i][REV_EX_ER_L1] -= floor(info->mcount[i][REV_EX_ER_L1]);
-                       info->mcount[i][REV_EX_ER_R1] += info->mrate[i][REV_EX_ER_R1];
-                       info->mcount[i][REV_EX_ER_R1] -= floor(info->mcount[i][REV_EX_ER_R1]);
-                       info->mcount[i][REV_EX_RV_L1] += info->mrate[i][REV_EX_RV_L1];
-                       info->mcount[i][REV_EX_RV_L1] -= floor(info->mcount[i][REV_EX_RV_L1]);
-                       info->mcount[i][REV_EX_RV_R1] += info->mrate[i][REV_EX_RV_R1];
-                       info->mcount[i][REV_EX_RV_R1] -= floor(info->mcount[i][REV_EX_RV_R1]);
-                       fp1[0] = mindexf - info->mdelay[i][REV_EX_ER_L1] - info->mdepth[i][REV_EX_ER_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_L1] + info->mphase[i][REV_EX_ER_L1]);        
-                       fp1[1] = mindexf - info->mdelay[i][REV_EX_ER_R1] - info->mdepth[i][REV_EX_ER_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_R1] + info->mphase[i][REV_EX_ER_R1]);        
-                       fp1[2] = mindexf - info->mdelay[i][REV_EX_RV_L1] - info->mdepth[i][REV_EX_RV_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_L1] + info->mphase[i][REV_EX_RV_L1]);        
-                       fp1[3] = mindexf - info->mdelay[i][REV_EX_RV_R1] - info->mdepth[i][REV_EX_RV_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_R1] + info->mphase[i][REV_EX_RV_R1]);        
+                       info->mcount[i][chofs0] += info->mrate[i][chofs0];
+                       info->mcount[i][chofs0] -= floor(info->mcount[i][chofs0]);
+                       info->mcount[i][chofs2] += info->mrate[i][chofs2];
+                       info->mcount[i][chofs2] -= floor(info->mcount[i][chofs2]);
+                       fp1[0] = mindexf - info->mdelay[i][chofs0] - info->mdepth[i][chofs0]
+                               * lookup2_sine_p(info->mcount[i][chofs0] + info->mphase[i][chofs0]);    
+                       fp1[1] = mindexf - info->mdelay[i][chofs2] - info->mdepth[i][chofs2]
+                               * lookup2_sine_p(info->mcount[i][chofs2] + info->mphase[i][chofs2]);    
                        if(fp1[0] < 0) {fp1[0] += msize;}
-                       if(fp1[1] < 0) {fp1[1] += msize;}               
-                       if(fp1[2] < 0) {fp1[2] += msize;}
-                       if(fp1[3] < 0) {fp1[3] += msize;}                       
-                       fp2[0] = floor(fp1[0]); index[0] = fp2[0]; 
-                       fp2[1] = floor(fp1[1]); index[1] = fp2[1];
-                       fp2[2] = floor(fp1[2]); index[2] = fp2[2]; 
-                       fp2[3] = floor(fp1[3]); index[3] = fp2[3]; 
-                       v1[0] = info->buf[i][REV_EX_ER_L1][index[0]]; v2[0] = info->buf[i][REV_EX_ER_L1][index[0] + 1];
-                       v1[1] = info->buf[i][REV_EX_ER_R1][index[1]]; v2[1] = info->buf[i][REV_EX_ER_R1][index[1] + 1];
-                       v1[2] = info->buf[i][REV_EX_RV_L1][index[2]]; v2[2] = info->buf[i][REV_EX_RV_L1][index[2] + 1];
-                       v1[3] = info->buf[i][REV_EX_RV_R1][index[3]]; v2[3] = info->buf[i][REV_EX_RV_R1][index[3] + 1];
+                       if(fp1[1] < 0) {fp1[1] += msize;}                       
+                       fp2[0] = floor(fp1[0]);
+                       fp2[1] = floor(fp1[1]);
+                       index[0] = fp2[0]; 
+                       index[1] = fp2[1];  
+                       v1[0] = info->buf[i][chofs0][index[0]]; v2[0] = info->buf[i][chofs0][index[0] + 1];
+                       v1[1] = info->buf[i][chofs2][index[1]]; v2[1] = info->buf[i][chofs2][index[1] + 1];
                        // er out
-                       dat_er[0] += v1[0] + (v2[0] - v1[0]) * (fp1[0] - fp2[0]); // linear interpolation
-                       dat_er[1] += v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
+                       dat_er += v1[0] + (v2[0] - v1[0]) * (fp1[0] - fp2[0]); // linear interpolation
                        // er in
-                       info->buf[i][0][*mindex] = input[0]; info->buf[i][1][*mindex] = input[1];
-                       input[0] += sprd; input[1] -= sprd; // spread
+                       info->buf[i][chofs0][*mindex] = input;
+                       input += sprd; // spread
                        // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
+                       tmp_rv = *info->rv_in[i][chofs0];
                        // rv out       
-                       tmp1[0] = v1[2] + (v2[2] - v1[2]) * (fp1[2] - fp2[2]); // linear interpolation
-                       tmp1[1] = v1[3] + (v2[3] - v1[3]) * (fp1[3] - fp2[3]); // linear interpolation
+                       tmp1 = v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
+               //      sample_filter_stereo(&rv_fc1[i], &flt[0], &flt[1]);     
                        lpf = &rv_fc[i];
-                       lpf->db[0] = imuldiv28(tmp1[0], lpf->dc[0]) + imuldiv28(lpf->db[0], lpf->dc[1]);
-                       lpf->db[1] = imuldiv28(tmp1[1], lpf->dc[0]) + imuldiv28(lpf->db[1], lpf->dc[1]);
-                       dat_rv[0] += (info->rv_out[i][0] = imuldiv24(tmp1[0], flt_dry) + imuldiv24(lpf->db[0], flt_wet));
-                       dat_rv[1] += (info->rv_out[i][1] = imuldiv24(tmp1[1], flt_dry) + imuldiv24(lpf->db[1], flt_wet));
+                       lpf->db[chofs0] = lpf->dc[0] * tmp1 + lpf->dc[1] * lpf->db[chofs0];
+                       dat_rv += (info->rv_out[i][chofs0] = tmp1 * flt_dry + lpf->db[chofs0] * flt_wet);
                        // rv in
-                       info->buf[i][2][*mindex] = input_rv[0] + imuldiv24(tmp_rv[0], rv_feedback[i]);
-                       info->buf[i][3][*mindex] = input_rv[1] + imuldiv24(tmp_rv[1], rv_feedback[i]);
-
+                       info->buf[i][chofs2][*mindex] = input_rv + tmp_rv * rv_feedback[i];
                        if(*mindex == 0){
-                               info->buf[i][0][msize] = info->buf[i][0][0];
-                               info->buf[i][1][msize] = info->buf[i][1][0];
-                               info->buf[i][2][msize] = info->buf[i][2][0];
-                               info->buf[i][3][msize] = info->buf[i][3][0];
+                               info->buf[i][chofs0][msize] = info->buf[i][chofs0][0];
+                               info->buf[i][chofs2][msize] = info->buf[i][chofs2][0];
                        }
                }
-               // er flt
-               dbL[0] = imuldiv28(dat_er[0], dcL[0]) + imuldiv28(dbL[0], dcL[1]);
-               dbL[1] = imuldiv28(dat_er[1], dcL[0]) + imuldiv28(dbL[1], dcL[1]);
-               dat_er[0] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[0], flt_wet);
-               dat_er[1] = imuldiv24(dat_er, flt_dry) + imuldiv24(dbL[1], flt_wet);
+               dbL[chofs0] = dcL[0] * dat_er + dcL[1] * dbL[chofs0];
+               dat_er = dat_er * flt_dry + dbL[chofs0] * flt_wet;
                // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
+               bufrd[*indexrd + chofs0] = dat_er;
                // ap
+               if(ext_reverb_ex_ap_num){
                if ((++(*aindex)) >= asize) {*aindex -= asize;}
-               aindexf = aindexf;      
-               info->abuf[REV_EX_ER_L1][*aindex] = imuldiv24(dat_er[0], levelap) + imuldiv24(fb_ap1[0], apfbi); 
-               info->abuf[REV_EX_ER_R1][*aindex] = imuldiv24(dat_er[1], levelap) + imuldiv24(fb_ap1[1], apfbi); 
-               info->abuf[REV_EX_RV_L1][*aindex] = imuldiv24(dat_rv[0], levelap) + imuldiv24(fb_ap2[0], apfbi); 
-               info->abuf[REV_EX_RV_R1][*aindex] = imuldiv24(dat_rv[1], levelap) + imuldiv24(fb_ap2[1], apfbi);
+               aindexf = *aindex;
+               info->abuf[chofs0][*aindex] = dat_er * levelap + *fb_ap1 * REV_EX_AP_FB; 
+               info->abuf[chofs2][*aindex] = dat_rv * levelap + *fb_ap2 * REV_EX_AP_FB; 
                if(*aindex == 0){
-                       info->abuf[0][asize] = info->abuf[0][0];
-                       info->abuf[1][asize] = info->abuf[1][0];
-                       info->abuf[2][asize] = info->abuf[2][0];
-                       info->abuf[3][asize] = info->abuf[3][0];
+                       info->abuf[chofs0][asize] = info->abuf[chofs0][0];
+                       info->abuf[chofs2][asize] = info->abuf[chofs2][0];
                }       
-               for (i = 0; i < REV_EX_AP_MAX; i++) {                   
-                       int32 index[4];
-                       DATA_T v1[4], v2[4];
-                       FLOAT_T fp1[4], fp2[4]; 
+               for (i = 0; i < ext_reverb_ex_ap_num; i++) {
+                       int32 index[2];
+                       DATA_T v1[2], v2[2];
+                       FLOAT_T fp1[2], fp2[2]; 
                        // lfo
-                       info->acount[i][REV_EX_ER_L1] += info->arate[i][REV_EX_ER_L1];
-                       info->acount[i][REV_EX_ER_L1] -= floor(info->acount[i][REV_EX_ER_L1]);
-                       info->acount[i][REV_EX_ER_R1] += info->arate[i][REV_EX_ER_R1];
-                       info->acount[i][REV_EX_ER_R1] -= floor(info->acount[i][REV_EX_ER_R1]);
-                       info->acount[i][REV_EX_RV_L1] += info->arate[i][REV_EX_RV_L1];
-                       info->acount[i][REV_EX_RV_L1] -= floor(info->acount[i][REV_EX_RV_L1]);
-                       info->acount[i][REV_EX_RV_R1] += info->arate[i][REV_EX_RV_R1];
-                       info->acount[i][REV_EX_RV_R1] -= floor(info->acount[i][REV_EX_RV_R1]);
-                       fp1[0] = aindexf - info->adelay[i][REV_EX_ER_L1] - info->adepth[i][REV_EX_ER_L1]
-                               * lookup2_sine_p(info->acount[i][REV_EX_ER_L1] + info->aphase[i][REV_EX_ER_L1]);        
-                       fp1[1] = aindexf - info->adelay[i][REV_EX_ER_R1] - info->adepth[i][REV_EX_ER_R1]
-                               * lookup2_sine_p(info->acount[i][REV_EX_ER_R1] + info->aphase[i][REV_EX_ER_R1]);        
-                       fp1[2] = aindexf - info->adelay[i][REV_EX_RV_L1] - info->adepth[i][REV_EX_RV_L1]
-                               * lookup2_sine_p(info->acount[i][REV_EX_RV_L1] + info->aphase[i][REV_EX_RV_L1]);        
-                       fp1[3] = aindexf - info->adelay[i][REV_EX_RV_R1] - info->adepth[i][REV_EX_RV_R1]
-                               * lookup2_sine_p(info->acount[i][REV_EX_RV_R1] + info->aphase[i][REV_EX_RV_R1]);        
+                       info->acount[i][chofs0] += info->arate[i][chofs0];
+                       info->acount[i][chofs0] -= floor(info->acount[i][chofs0]);
+                       info->acount[i][chofs2] += info->arate[i][chofs2];
+                       info->acount[i][chofs2] -= floor(info->acount[i][chofs2]);
+                       fp1[0] = aindexf - info->adelay[i][chofs0] - info->adepth[i][chofs0]
+                               * lookup2_sine_p(info->acount[i][chofs0] + info->aphase[i][chofs0]);    
+                       fp1[1] = aindexf - info->adelay[i][chofs2] - info->adepth[i][chofs2]
+                               * lookup2_sine_p(info->acount[i][chofs2] + info->aphase[i][chofs2]);    
                        if(fp1[0] < 0) {fp1[0] += asize;}
-                       if(fp1[1] < 0) {fp1[1] += asize;}               
-                       if(fp1[2] < 0) {fp1[2] += asize;}
-                       if(fp1[3] < 0) {fp1[3] += asize;}                       
+                       if(fp1[1] < 0) {fp1[1] += asize;}                       
                        fp2[0] = floor(fp1[0]);
                        fp2[1] = floor(fp1[1]);
-                       fp2[2] = floor(fp1[2]);
-                       fp2[3] = floor(fp1[3]);
                        index[0] = fp2[0]; 
                        index[1] = fp2[1]; 
-                       index[2] = fp2[2]; 
-                       index[3] = fp2[3]; 
-                       v1[0] = info->abuf[REV_EX_ER_L1][index[0]]; v2[0] = info->abuf[REV_EX_ER_L1][index[0] + 1];
-                       v1[1] = info->abuf[REV_EX_ER_R1][index[1]]; v2[1] = info->abuf[REV_EX_ER_R1][index[1] + 1];
-                       v1[2] = info->abuf[REV_EX_RV_L1][index[2]]; v2[2] = info->abuf[REV_EX_RV_L1][index[2] + 1];
-                       v1[3] = info->abuf[REV_EX_RV_R1][index[3]]; v2[3] = info->abuf[REV_EX_RV_R1][index[3] + 1];
-                       dat_er[0] += v1[0] + (v2[0] - v1[0]) * (fp1[0] - fp2[0]); // linear interpolation
-                       dat_er[1] += v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
-                       dat_rv[0] += v1[2] + (v2[2] - v1[2]) * (fp1[2] - fp2[2]); // linear interpolation
-                       dat_rv[1] += v1[3] + (v2[3] - v1[3]) * (fp1[3] - fp2[3]); // linear interpolation
+                       v1[0] = info->abuf[chofs0][index[0]]; v2[0] = info->abuf[chofs0][index[0] + 1];
+                       v1[1] = info->abuf[chofs2][index[1]]; v2[1] = info->abuf[chofs2][index[1] + 1];
+                       dat_er += v1[0] + (v2[0] - v1[0]) * (fp1[0] - fp2[0]); // linear interpolation
+                       dat_rv += v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
+               }
+               *fb_ap1 = dat_er;
+               *fb_ap2 = dat_rv;
                }
-               fb_ap1[0] = dat_er[0]; fb_ap1[1] = dat_er[1];
-               fb_ap2[0] = dat_rv[0]; fb_ap2[1] = dat_rv[1];
                // out
-               hist[0] = imuldiv16(dat_rv[0], levelrv) - imuldiv16(dat_er[0], leveler);
-               hist[1] = imuldiv16(dat_rv[1], levelrv) - imuldiv16(dat_er[1], leveler);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = imuldiv28(dbHL[0], dcH[0]) + imuldiv28(dbHL[1], dcH[1]) + imuldiv28(dbHL[2], dcH[2])
-                       - imuldiv28(dbHL[3], dcH[3]) - imuldiv28(dbHL[4], dcH[4]);
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = imuldiv28(dbHR[0], dcH[0]) + imuldiv28(dbHR[1], dcH[1]) + imuldiv28(dbHR[2], dcH[2])
-                       - imuldiv28(dbHR[3], dcH[3]) - imuldiv28(dbHR[4], dcH[4]);
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];              
-               buf[k] = hist[0]; buf[++k] = hist[1];
+               hist = dat_rv * levelrv + dat_er * leveler;
+               dbH[0] = hist;  
+               hist = dbH[2] = dcH[0] * dbH[0] + dcH[1] * dbH[1] + dcH[2] * dbH[2] + dcH[3] * dbH[3] + dcH[4] * dbH[4];
+               dbH[4] = dbH[3];
+               dbH[3] = dbH[2];
+               dbH[2] = dbH[1];
+               dbH[1] = dbH[0];
+               obuf[k + chofs0] = hist;
+       }
+       *phist = hist;
        }
 }
+#endif // (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)       
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)  
 
-static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
+#if (OPT_MODE == 1) && !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT) /* fixed-point implementation */
+static void do_reverb_ex_mod_chSTMS(DATA_T *buf, int32 count, InfoReverbEX *info)
 {
        int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
        int32 *mindex = &info->index2[REV_EX_UNIT], msize = info->size2[REV_EX_UNIT];
@@ -6807,19 +6132,21 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
        FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
        int32 leveler = info->leveleri, levelrv = info->levelrvi, feedback = info->feedbacki,
                *rv_feedback = info->rv_feedbacki, flt_dry = info->flt_dryi, flt_wet = info->flt_weti,
-               *dcH = hpf->dc, *dcL = er_fc->dc, in_level = info->in_leveli;
+               *dcH = hpf->dc, *dcL = er_fc->dc, in_level = info->in_leveli, st_sprd = info->st_sprdi;
        FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
        DATA_T  *hist = info->hist, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2];
+               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2], sprd;
        DATA_T *bufa1 = info->buf2[REV_EX_AP1], *bufa2 = info->buf2[REV_EX_AP2], 
                *fb_ap1 = info->fb_ap1, *fb_ap2 = info->fb_ap2;
        int32 levelap = info->levelapi;
        const int32 apfbi = TIM_FSCALE(REV_EX_AP_FB, 24);
        FLOAT_T mindexf, aindexf;
-       // CH_MIX_STEREO:
+       // CH_STEREO: CH_MIX_STEREO:
        for (k = 0; k < count; k++)
        {               
-               input[0] = input[1] = imuldiv24((buf[k] + buf[k + 1]), info->in_level);
+               input[0] = buf[k]; input[1] = buf[k + 1];
+               sprd = imuldiv24((input[0] - input[1]), st_sprd);
+               input[0] = input[1] = imuldiv24((input[0] + input[1]), in_level);
                input[0] += imuldiv24(hist[0], feedback); input[1] += imuldiv24(hist[1], feedback);
                // rv delay out
                if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
@@ -6866,6 +6193,7 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                        dat_er[1] += v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
                        // er in
                        info->buf[i][0][*mindex] = input[0]; info->buf[i][1][*mindex] = input[1];
+                       input[0] += sprd; input[1] -= sprd; // spread
                        // rv save
                        tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
                        // rv out       
@@ -6895,6 +6223,7 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                // rv delay in
                bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
                // ap
+               if(ext_reverb_ex_ap_num){
                if ((++(*aindex)) >= asize) {*aindex -= asize;}
                aindexf = aindexf;      
                info->abuf[REV_EX_ER_L1][*aindex] = imuldiv24(dat_er[0], levelap) + imuldiv24(fb_ap1[0], apfbi); 
@@ -6906,8 +6235,8 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                        info->abuf[1][asize] = info->abuf[1][0];
                        info->abuf[2][asize] = info->abuf[2][0];
                        info->abuf[3][asize] = info->abuf[3][0];
-               }               
-               for (i = 0; i < REV_EX_AP_MAX; i++) {                   
+               }       
+               for (i = 0; i < ext_reverb_ex_ap_num; i++) {                    
                        int32 index[4];
                        DATA_T v1[4], v2[4];
                        FLOAT_T fp1[4], fp2[4]; 
@@ -6951,9 +6280,10 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                }
                fb_ap1[0] = dat_er[0]; fb_ap1[1] = dat_er[1];
                fb_ap2[0] = dat_rv[0]; fb_ap2[1] = dat_rv[1];
+               }
                // out
-               hist[0] = imuldiv16(dat_rv[0], levelrv) - imuldiv16(dat_er[0], leveler);
-               hist[1] = imuldiv16(dat_rv[1], levelrv) - imuldiv16(dat_er[1], leveler);
+               hist[0] = imuldiv16(dat_rv[0], levelrv) + imuldiv16(dat_er[0], leveler);
+               hist[1] = imuldiv16(dat_rv[1], levelrv) + imuldiv16(dat_er[1], leveler);
                dbHL[0] = hist[0];      
                hist[0] = dbHL[2] = imuldiv28(dbHL[0], dcH[0]) + imuldiv28(dbHL[1], dcH[1]) + imuldiv28(dbHL[2], dcH[2])
                        - imuldiv28(dbHL[3], dcH[3]) - imuldiv28(dbHL[4], dcH[4]);
@@ -6976,7 +6306,7 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
 /*
 SSE2 128bitSIMD : double*2ch, int32*4ch
 */
-static void do_reverb_ex_mod_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
+static void do_reverb_ex_mod_chSTMS(DATA_T *buf, int32 count, InfoReverbEX *info)
 {
        int32 i, k = 0;
        FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
@@ -6990,23 +6320,27 @@ static void do_reverb_ex_mod_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
                vec_leveler = MM_LOAD1_PD(&info->leveler), vec_levelrv = MM_LOAD1_PD(&info->levelrv),
                vec_dry = MM_LOAD1_PD(&info->flt_dry), vec_wet = MM_LOAD1_PD(&info->flt_wet),
                vec_feedback = MM_LOAD1_PD(&info->feedback), vec_hist = _mm_loadu_pd(info->hist);
-       __m128d vec_sp_sprd = _mm_set_pd(-info->st_sprd, info->st_sprd);
        __m128i index2 = _mm_loadu_si128((__m128i *)info->index2), size2 = _mm_loadu_si128((__m128i *)info->size2);
        __m128i add_idx2 = _mm_set_epi32(1, 1, 2, 1);
+       __m128d vec_ap_fb = _mm_set1_pd(REV_EX_AP_FB), vec_levelap = MM_LOAD1_PD(&info->levelap), 
+               vec_fbap1 = _mm_loadu_pd(info->fb_ap1), vec_fbap2 = _mm_loadu_pd(info->fb_ap2);
        __m128d vmsize = _mm_set1_pd(info->size2[REV_EX_UNIT]), vmi;
+       __m128d vasize = _mm_set1_pd(info->size2[REV_EX_AP1]), vai;
+       __m128d vtmp[2];
        int32 msize = info->size2[REV_EX_UNIT], mindex;
-       // CH_STEREO:
+       __m128d vec_sp_sprd = _mm_set_pd(-info->st_sprd, info->st_sprd);
+       // CH_STEREO: CH_MIX_STEREO:
        for (k = 0; k < count; k += 2)
        {               
-               int32 tmpi0;
+               ALIGN int32 tmpi2[4];
                vec_sprd = _mm_mul_pd(_mm_set1_pd(buf[k] - buf[k + 1]), vec_sp_sprd);
                vec_input_er = _mm_add_pd(_mm_set1_pd((buf[k] + buf[k + 1]) * DIV_MIX_LEVEL), _mm_mul_pd(vec_hist, vec_feedback));
                // index2 (rv delay, ap
                index2 = _mm_add_epi32(index2, add_idx2);
                index2 = _mm_and_si128(index2, _mm_cmplt_epi32(index2, size2));
-               tmpi0 = MM_EXTRACT_EPI32(index2, REV_EX_RD);
+               _mm_store_si128((__m128i *)&tmpi2, index2);
                // rv delay out
-               vec_input_rv = _mm_load_pd(&bufrd[tmpi0]); // REV_EX_RD
+               vec_input_rv = _mm_load_pd(&bufrd[tmpi2[REV_EX_RD]]);
                // unit
                vec_mixer = _mm_setzero_pd(); vec_mixrv = _mm_setzero_pd();
                vmi = _mm_cvtepi32_pd(_mm_shuffle_epi32(index2, 0x0));
@@ -7086,9 +6420,71 @@ static void do_reverb_ex_mod_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
                vec_dbL = MM_FMA2_PD(vec_dcL0, vec_mixer, vec_dcL1, vec_dbL);
                vec_mixer = MM_FMA2_PD(vec_mixer, vec_dry, vec_dbL, vec_wet);   
                // rv delay in
-               _mm_store_pd(&bufrd[tmpi0], vec_mixer); 
+               _mm_store_pd(&bufrd[tmpi2[REV_EX_RD]], vec_mixer);
+               // ap
+               if(ext_reverb_ex_ap_num){
+               vai = _mm_cvtepi32_pd(_mm_shuffle_epi32(index2, 0xAA));
+               vtmp[0] = MM_FMA2_PD(vec_mixer, vec_levelap, vec_fbap1, vec_ap_fb);
+               vtmp[1] = MM_FMA2_PD(vec_mixrv, vec_levelap, vec_fbap2, vec_ap_fb);
+               _mm_store_sd(&info->abuf[REV_EX_ER_L1][tmpi2[REV_EX_AP1]], vtmp[0]); 
+               _mm_store_sd(&info->abuf[REV_EX_ER_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[0], vtmp[0], 0x3)); 
+               _mm_store_sd(&info->abuf[REV_EX_RV_L1][tmpi2[REV_EX_AP1]], vtmp[1]); 
+               _mm_store_sd(&info->abuf[REV_EX_RV_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[1], vtmp[0], 0x3)); 
+               if(tmpi2[REV_EX_AP1] == 0){
+                       info->abuf[0][info->size2[REV_EX_AP1]] = info->abuf[0][0];
+                       info->abuf[1][info->size2[REV_EX_AP1]] = info->abuf[1][0];
+                       info->abuf[2][info->size2[REV_EX_AP1]] = info->abuf[2][0];
+                       info->abuf[3][info->size2[REV_EX_AP1]] = info->abuf[3][0];
+               }               
+               for (i = 0; i < ext_reverb_ex_ap_num; i++) {                            
+               __m128d vc[2], vr[2], vd[2], vfp[2], vtmp[4], vv1[2], vv2[2];
+               __m128i vindex[2];
+               // lfo
+               vc[0] = _mm_add_pd(_mm_loadu_pd(&info->acount[i][REV_EX_ER_L1]), _mm_loadu_pd(&info->arate[i][REV_EX_ER_L1])); // mcount+mrate
+               vc[1] = _mm_add_pd(_mm_loadu_pd(&info->acount[i][REV_EX_RV_L1]), _mm_loadu_pd(&info->arate[i][REV_EX_RV_L1])); // mcount+mrate
+#if (USE_X86_EXT_INTRIN >= 6) // sse4.1
+               vc[0] = _mm_sub_pd(vc[0], _mm_floor_pd(vc[0])); // count-=floor(count)
+               vc[1] = _mm_sub_pd(vc[1], _mm_floor_pd(vc[1])); // count-=floor(count)
+#else
+               vc[0] = _mm_sub_pd(vc[0], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[0]))); // count-=floor(count) +\82Ì\82Ý
+               vc[1] = _mm_sub_pd(vc[1], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[1]))); // count-=floor(count) +\82Ì\82Ý
+#endif
+               _mm_storeu_pd(&info->acount[i][REV_EX_ER_L1], vc[0]);
+               _mm_storeu_pd(&info->acount[i][REV_EX_RV_L1], vc[1]);
+               vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->aphase[i][REV_EX_ER_L1])); // count+phase
+               vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->aphase[i][REV_EX_RV_L1])); // count+phase
+               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(count)
+               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(cuont)  
+               vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_ER_L1]), vd[0]); // depth* sine
+               vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_RV_L1]), vd[1]); // depth* sine
+               vfp[0] = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][REV_EX_ER_L1])), vd[0]); // index-delay-depth
+               vfp[1] = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][REV_EX_RV_L1])), vd[1]); // index-delay-depth         
+               vfp[0] = _mm_add_pd(vfp[0], _mm_and_pd(vasize, _mm_cmplt_pd(vfp[0], _mm_setzero_pd())));        // fp<0 ? fp+size       
+               vfp[1] = _mm_add_pd(vfp[1], _mm_and_pd(vasize, _mm_cmplt_pd(vfp[1], _mm_setzero_pd())));        // fp<0 ? fp+size       
+               vindex[0] = _mm_cvttpd_epi32(vfp[0]); // (int)floor(fp)
+               vindex[1] = _mm_cvttpd_epi32(vfp[1]); // (int)floor(fp)
+#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 floor
+               vfp[0] = _mm_sub_pd(vfp[0], _mm_floor_pd(vfp[0])); // fp-floor(fp)
+               vfp[1] = _mm_sub_pd(vfp[1], _mm_floor_pd(vfp[1])); // fp-floor(fp)
+#else
+               vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
+               vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
+#endif
+               vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
+               vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
+               vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
+               vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
+               vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
+               vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
+               vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
+               vv2[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x3);
+               vec_mixer = _mm_add_pd(vec_mixer, MM_FMA_PD(_mm_sub_pd(vv2[0], vv1[0]), vfp[0], vv1[0]));
+               vec_mixrv = _mm_add_pd(vec_mixrv, MM_FMA_PD(_mm_sub_pd(vv2[1], vv1[1]), vfp[1], vv1[1]));
+               }
+               vec_fbap1 = vec_mixer; vec_fbap2 = vec_mixrv;
+               }
                // out
-               vec_hist = _mm_sub_pd(_mm_mul_pd(vec_mixrv, vec_levelrv), _mm_mul_pd(vec_mixer, vec_leveler));
+               vec_hist = _mm_add_pd(_mm_mul_pd(vec_mixrv, vec_levelrv), _mm_mul_pd(vec_mixer, vec_leveler));
                vec_dbH0 = vec_hist;
                vec_hist = vec_dbH2 = MM_FMA5_PD(vec_dcH0, vec_dbH0, vec_dcH1, vec_dbH1, vec_dcH2, vec_dbH2, vec_dcH3, vec_dbH3, vec_dcH4, vec_dbH4);
                vec_dbH4 = vec_dbH3; vec_dbH3 = vec_dbH2; vec_dbH2 = vec_dbH1; vec_dbH1 = vec_dbH0;
@@ -7098,751 +6494,15 @@ static void do_reverb_ex_mod_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
        _mm_storeu_pd(&hpf->db[0], vec_dbH0); _mm_storeu_pd(&hpf->db[2], vec_dbH1); _mm_storeu_pd(&hpf->db[4], vec_dbH2);
        _mm_storeu_pd(&hpf->db[6], vec_dbH3); _mm_storeu_pd(&hpf->db[8], vec_dbH4);
        _mm_storeu_si128((__m128i *)info->index2, index2);
+       _mm_storeu_pd(info->fb_ap1, vec_fbap1); _mm_storeu_pd(info->fb_ap2, vec_fbap2);
 }
 
-static void do_reverb_ex_mod_chMS(DATA_T *buf, int32 count, InfoReverbEX *info)
+#else /* floating-point implementation */
+static void do_reverb_ex_mod_chSTMS(DATA_T *buf, int32 count, InfoReverbEX *info)
 {
-       int32 i, k = 0;
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       DATA_T *bufrd = info->buf2[REV_EX_RD];
-       __m128d vec_dcH0 = MM_LOAD1_PD(&hpf->dc[0]), vec_dcH1 = MM_LOAD1_PD(&hpf->dc[1]), vec_dcH2 = MM_LOAD1_PD(&hpf->dc[2]), 
-               vec_dcH3 = MM_LOAD1_PD(&hpf->dc[3]), vec_dcH4 = MM_LOAD1_PD(&hpf->dc[4]),
-               vec_dcL0 = MM_LOAD1_PD(&er_fc->dc[0]), vec_dcL1 = MM_LOAD1_PD(&er_fc->dc[1]);
-       __m128d vec_dbH0 = _mm_loadu_pd(&hpf->db[0]), vec_dbH1 = _mm_loadu_pd(&hpf->db[2]), vec_dbH2 = _mm_loadu_pd(&hpf->db[4]), 
-               vec_dbH3 = _mm_loadu_pd(&hpf->db[6]), vec_dbH4 = _mm_loadu_pd(&hpf->db[8]), vec_dbL = _mm_loadu_pd(er_fc->db);
-       __m128d vec_tmp1, vec_input_er, vec_input_rv, vec_mixer, vec_mixap, vec_mixrv, vec_tmp_rv, vec_db, 
-               vec_leveler = MM_LOAD1_PD(&info->leveler), vec_levelrv = MM_LOAD1_PD(&info->levelrv),
-               vec_dry = MM_LOAD1_PD(&info->flt_dry), vec_wet = MM_LOAD1_PD(&info->flt_wet),
-               vec_feedback = MM_LOAD1_PD(&info->feedback), vec_hist = _mm_loadu_pd(info->hist),
-               vec_mix_level = _mm_set1_pd(DIV_MIX_LEVEL);
-       __m128i index2 = _mm_loadu_si128((__m128i *)info->index2), size2 = _mm_loadu_si128((__m128i *)info->size2);
-       __m128i add_idx2 = _mm_set_epi32(1, 1, 2, 1);
-       __m128d vmsize = _mm_set1_pd(info->size2[REV_EX_UNIT]), vmi;
-       int32 msize = info->size2[REV_EX_UNIT], mindex;
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k += 2)
-       {               
-               int32 tmpi0;
-               vec_input_er = _mm_add_pd(_mm_set1_pd((buf[k] + buf[k + 1]) * DIV_MIX_LEVEL), _mm_mul_pd(vec_hist, vec_feedback));
-               // index2 (rv delay, ap
-               index2 = _mm_add_epi32(index2, add_idx2);
-               index2 = _mm_and_si128(index2, _mm_cmplt_epi32(index2, size2));
-               tmpi0 = MM_EXTRACT_EPI32(index2, REV_EX_RD);
-               // rv delay out
-               vec_input_rv = _mm_load_pd(&bufrd[tmpi0]); // REV_EX_RD
-               // unit
-               vec_mixer = _mm_setzero_pd(); vec_mixrv = _mm_setzero_pd();
-               vmi = _mm_cvtepi32_pd(_mm_shuffle_epi32(index2, 0x0));
-               mindex = _mm_cvtsi128_si32(index2);
-               for (i = 0; i < info->unit_num; i++) {
-               __m128d vc[2], vr[2], vd[2], vfp[2], vtmp[4], vv1[2], vv2[2];
-               __m128i vindex[2];
-               // lfo
-               vc[0] = _mm_add_pd(_mm_loadu_pd(&info->mcount[i][REV_EX_ER_L1]), _mm_loadu_pd(&info->mrate[i][REV_EX_ER_L1])); // mcount+mrate
-               vc[1] = _mm_add_pd(_mm_loadu_pd(&info->mcount[i][REV_EX_RV_L1]), _mm_loadu_pd(&info->mrate[i][REV_EX_RV_L1])); // mcount+mrate
-#if (USE_X86_EXT_INTRIN >= 6) // sse4.1
-               vc[0] = _mm_sub_pd(vc[0], _mm_floor_pd(vc[0])); // mcount-=floor(mcount)
-               vc[1] = _mm_sub_pd(vc[1], _mm_floor_pd(vc[1])); // mcount-=floor(mcount)
-#else
-               vc[0] = _mm_sub_pd(vc[0], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[0]))); // mcount-=(int)(mcount) +\82Ì\82Ý
-               vc[1] = _mm_sub_pd(vc[1], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[1]))); // mcount-=(int)(mcount) +\82Ì\82Ý
-#endif
-               _mm_storeu_pd(&info->mcount[i][REV_EX_ER_L1], vc[0]);
-               _mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
-               vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
-               vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
-               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
-               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)     
-               vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
-               vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
-               vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
-               vfp[1] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_RV_L1])), vd[1]); // mindex-mdelay-mdepth              
-               vfp[0] = _mm_add_pd(vfp[0], _mm_and_pd(vmsize, _mm_cmplt_pd(vfp[0], _mm_setzero_pd())));        // fp<0 ? fp+msize      
-               vfp[1] = _mm_add_pd(vfp[1], _mm_and_pd(vmsize, _mm_cmplt_pd(vfp[1], _mm_setzero_pd())));        // fp<0 ? fp+msize      
-               vindex[0] = _mm_cvttpd_epi32(vfp[0]); // (int)floor(fp)
-               vindex[1] = _mm_cvttpd_epi32(vfp[1]); // (int)floor(fp)
-#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 floor
-               vfp[0] = _mm_sub_pd(vfp[0], _mm_floor_pd(vfp[0])); // fp-floor(fp)
-               vfp[1] = _mm_sub_pd(vfp[1], _mm_floor_pd(vfp[1])); // fp-floor(fp)
-#else
-               vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
-               vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
-#endif
-               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
-               vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
-               vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
-               vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
-               vv2[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x3);
-               vv1[0] = MM_FMA_PD(_mm_sub_pd(vv2[0], vv1[0]), vfp[0], vv1[0]);
-               vv1[1] = MM_FMA_PD(_mm_sub_pd(vv2[1], vv1[1]), vfp[1], vv1[1]);
-               // er out
-               vec_mixer = _mm_add_pd(vec_mixer, vv1[0]);
-               _mm_store_sd(&(info->buf[i][0][mindex]), vec_input_er); 
-               _mm_store_sd(&(info->buf[i][1][mindex]), _mm_shuffle_pd(vec_input_er, vec_input_er, 0x1));
-               // rv save
-               vec_tmp_rv = _mm_set_pd(*info->rv_in[i][1], *info->rv_in[i][0]);
-               // rv out
-               vec_tmp1 = vv1[1];
-               lpf = &rv_fc[i];
-               vec_db = _mm_loadu_pd(lpf->db);
-               vec_db = MM_FMA2_PD(MM_LOAD1_PD(&lpf->dc[0]), vec_tmp1, MM_LOAD1_PD(&lpf->dc[1]), vec_db);
-               _mm_storeu_pd(lpf->db, vec_db);
-               vec_tmp1 = MM_FMA2_PD(vec_tmp1, vec_dry, vec_db, vec_wet);
-               vec_mixrv = _mm_add_pd(vec_mixrv, vec_tmp1);
-               _mm_storeu_pd(info->rv_out[i], vec_tmp1);
-               // rv in
-               vec_tmp1 = _mm_add_pd(vec_input_rv, _mm_mul_pd(vec_tmp_rv, MM_LOAD1_PD(&info->rv_feedback[i])));
-               _mm_store_sd(&(info->buf[i][2][mindex]), vec_tmp1);
-               _mm_store_sd(&(info->buf[i][3][mindex]), _mm_shuffle_pd(vec_tmp1, vec_tmp1, 0x1));
-               if(mindex == 0){
-                       info->buf[i][0][msize] = info->buf[i][0][0];
-                       info->buf[i][1][msize] = info->buf[i][1][0];
-                       info->buf[i][2][msize] = info->buf[i][2][0];
-                       info->buf[i][3][msize] = info->buf[i][3][0];
-               }
-               }
-               // er flt
-               vec_dbL = MM_FMA2_PD(vec_dcL0, vec_mixer, vec_dcL1, vec_dbL);
-               vec_mixer = MM_FMA2_PD(vec_mixer, vec_dry, vec_dbL, vec_wet);   
-               // rv delay in
-               _mm_store_pd(&bufrd[tmpi0], vec_mixer); 
-               // out
-               vec_hist = _mm_sub_pd(_mm_mul_pd(vec_mixrv, vec_levelrv), _mm_mul_pd(vec_mixer, vec_leveler));
-               vec_dbH0 = vec_hist;
-               vec_hist = vec_dbH2 = MM_FMA5_PD(vec_dcH0, vec_dbH0, vec_dcH1, vec_dbH1, vec_dcH2, vec_dbH2, vec_dcH3, vec_dbH3, vec_dcH4, vec_dbH4);
-               vec_dbH4 = vec_dbH3; vec_dbH3 = vec_dbH2; vec_dbH2 = vec_dbH1; vec_dbH1 = vec_dbH0;
-               _mm_store_pd(&buf[k], vec_hist);
-       }
-       _mm_storeu_pd(info->hist, vec_hist); _mm_storeu_pd(er_fc->db, vec_dbL);
-       _mm_storeu_pd(&hpf->db[0], vec_dbH0); _mm_storeu_pd(&hpf->db[2], vec_dbH1); _mm_storeu_pd(&hpf->db[4], vec_dbH2);
-       _mm_storeu_pd(&hpf->db[6], vec_dbH3); _mm_storeu_pd(&hpf->db[8], vec_dbH4);
-       _mm_storeu_si128((__m128i *)info->index2, index2);
-}
-
-static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0;
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       DATA_T *bufrd = info->buf2[REV_EX_RD];
-       __m128d vec_dcH0 = MM_LOAD1_PD(&hpf->dc[0]), vec_dcH1 = MM_LOAD1_PD(&hpf->dc[1]), vec_dcH2 = MM_LOAD1_PD(&hpf->dc[2]), 
-               vec_dcH3 = MM_LOAD1_PD(&hpf->dc[3]), vec_dcH4 = MM_LOAD1_PD(&hpf->dc[4]),
-               vec_dcL0 = MM_LOAD1_PD(&er_fc->dc[0]), vec_dcL1 = MM_LOAD1_PD(&er_fc->dc[1]);
-       __m128d vec_dbH0 = _mm_loadu_pd(&hpf->db[0]), vec_dbH1 = _mm_loadu_pd(&hpf->db[2]), vec_dbH2 = _mm_loadu_pd(&hpf->db[4]), 
-               vec_dbH3 = _mm_loadu_pd(&hpf->db[6]), vec_dbH4 = _mm_loadu_pd(&hpf->db[8]), vec_dbL = _mm_loadu_pd(er_fc->db);
-       __m128d vec_tmp1, vec_input_er, vec_input_rv, vec_mixer, vec_mixap, vec_mixrv, vec_tmp_rv, vec_db, vec_sprd,
-               vec_leveler = MM_LOAD1_PD(&info->leveler), vec_levelrv = MM_LOAD1_PD(&info->levelrv),
-               vec_dry = MM_LOAD1_PD(&info->flt_dry), vec_wet = MM_LOAD1_PD(&info->flt_wet),
-               vec_feedback = MM_LOAD1_PD(&info->feedback), vec_hist = _mm_loadu_pd(info->hist);
-       __m128d vec_sp_sprd = _mm_set_pd(-info->st_sprd, info->st_sprd);
-       __m128i index2 = _mm_loadu_si128((__m128i *)info->index2), size2 = _mm_loadu_si128((__m128i *)info->size2);
-       __m128i add_idx2 = _mm_set_epi32(1, 1, 2, 1);
-       __m128d vec_ap_fb = _mm_set1_pd(REV_EX_AP_FB), vec_levelap = MM_LOAD1_PD(&info->levelap), 
-               vec_fbap1 = _mm_loadu_pd(info->fb_ap1), vec_fbap2 = _mm_loadu_pd(info->fb_ap2);
-       __m128d vmsize = _mm_set1_pd(info->size2[REV_EX_UNIT]), vmi;
-       __m128d vasize = _mm_set1_pd(info->size2[REV_EX_AP1]), vai;
-       __m128d vtmp[2];
-       int32 msize = info->size2[REV_EX_UNIT], mindex;
-       // CH_STEREO:
-       for (k = 0; k < count; k += 2)
-       {               
-               ALIGN int32 tmpi2[4];
-               vec_sprd = _mm_mul_pd(_mm_set1_pd(buf[k] - buf[k + 1]), vec_sp_sprd);
-               vec_input_er = _mm_add_pd(_mm_set1_pd((buf[k] + buf[k + 1]) * DIV_MIX_LEVEL), _mm_mul_pd(vec_hist, vec_feedback));
-               // index2 (rv delay, ap
-               index2 = _mm_add_epi32(index2, add_idx2);
-               index2 = _mm_and_si128(index2, _mm_cmplt_epi32(index2, size2));
-               _mm_store_si128((__m128i *)&tmpi2, index2);
-               // rv delay out
-               vec_input_rv = _mm_load_pd(&bufrd[tmpi2[REV_EX_RD]]);
-               // unit
-               vec_mixer = _mm_setzero_pd(); vec_mixrv = _mm_setzero_pd();
-               vmi = _mm_cvtepi32_pd(_mm_shuffle_epi32(index2, 0x0));
-               mindex = _mm_cvtsi128_si32(index2);
-               for (i = 0; i < info->unit_num; i++) {
-               __m128d vc[2], vr[2], vd[2], vfp[2], vtmp[4], vv1[2], vv2[2];
-               __m128i vindex[2];
-               // lfo
-               vc[0] = _mm_add_pd(_mm_loadu_pd(&info->mcount[i][REV_EX_ER_L1]), _mm_loadu_pd(&info->mrate[i][REV_EX_ER_L1])); // mcount+mrate
-               vc[1] = _mm_add_pd(_mm_loadu_pd(&info->mcount[i][REV_EX_RV_L1]), _mm_loadu_pd(&info->mrate[i][REV_EX_RV_L1])); // mcount+mrate
-#if (USE_X86_EXT_INTRIN >= 6) // sse4.1
-               vc[0] = _mm_sub_pd(vc[0], _mm_floor_pd(vc[0])); // mcount-=floor(mcount)
-               vc[1] = _mm_sub_pd(vc[1], _mm_floor_pd(vc[1])); // mcount-=floor(mcount)
-#else
-               vc[0] = _mm_sub_pd(vc[0], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[0]))); // mcount-=(int)(mcount) +\82Ì\82Ý
-               vc[1] = _mm_sub_pd(vc[1], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[1]))); // mcount-=(int)(mcount) +\82Ì\82Ý
-#endif
-               _mm_storeu_pd(&info->mcount[i][REV_EX_ER_L1], vc[0]);
-               _mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
-               vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
-               vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
-               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
-               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)     
-               vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
-               vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
-               vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
-               vfp[1] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_RV_L1])), vd[1]); // mindex-mdelay-mdepth              
-               vfp[0] = _mm_add_pd(vfp[0], _mm_and_pd(vmsize, _mm_cmplt_pd(vfp[0], _mm_setzero_pd())));        // fp<0 ? fp+msize      
-               vfp[1] = _mm_add_pd(vfp[1], _mm_and_pd(vmsize, _mm_cmplt_pd(vfp[1], _mm_setzero_pd())));        // fp<0 ? fp+msize      
-               vindex[0] = _mm_cvttpd_epi32(vfp[0]); // (int)floor(fp)
-               vindex[1] = _mm_cvttpd_epi32(vfp[1]); // (int)floor(fp)
-#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 floor
-               vfp[0] = _mm_sub_pd(vfp[0], _mm_floor_pd(vfp[0])); // fp-floor(fp)
-               vfp[1] = _mm_sub_pd(vfp[1], _mm_floor_pd(vfp[1])); // fp-floor(fp)
-#else
-               vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
-               vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
-#endif
-               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
-               vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
-               vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
-               vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
-               vv2[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x3);
-               vv1[0] = MM_FMA_PD(_mm_sub_pd(vv2[0], vv1[0]), vfp[0], vv1[0]);
-               vv1[1] = MM_FMA_PD(_mm_sub_pd(vv2[1], vv1[1]), vfp[1], vv1[1]);
-               // er out
-               vec_mixer = _mm_add_pd(vec_mixer, vv1[0]);
-               _mm_store_sd(&(info->buf[i][0][mindex]), vec_input_er); 
-               _mm_store_sd(&(info->buf[i][1][mindex]), _mm_shuffle_pd(vec_input_er, vec_input_er, 0x1));
-               vec_input_er = _mm_add_pd(vec_input_er, vec_sprd); // spread
-               // rv save
-               vec_tmp_rv = _mm_set_pd(*info->rv_in[i][1], *info->rv_in[i][0]);
-               // rv out
-               vec_tmp1 = vv1[1];
-               lpf = &rv_fc[i];
-               vec_db = _mm_loadu_pd(lpf->db);
-               vec_db = MM_FMA2_PD(MM_LOAD1_PD(&lpf->dc[0]), vec_tmp1, MM_LOAD1_PD(&lpf->dc[1]), vec_db);
-               _mm_storeu_pd(lpf->db, vec_db);
-               vec_tmp1 = MM_FMA2_PD(vec_tmp1, vec_dry, vec_db, vec_wet);
-               vec_mixrv = _mm_add_pd(vec_mixrv, vec_tmp1);
-               _mm_storeu_pd(info->rv_out[i], vec_tmp1);
-               // rv in
-               vec_tmp1 = _mm_add_pd(vec_input_rv, _mm_mul_pd(vec_tmp_rv, MM_LOAD1_PD(&info->rv_feedback[i])));
-               _mm_store_sd(&(info->buf[i][2][mindex]), vec_tmp1);
-               _mm_store_sd(&(info->buf[i][3][mindex]), _mm_shuffle_pd(vec_tmp1, vec_tmp1, 0x1));
-               if(mindex == 0){
-                       info->buf[i][0][msize] = info->buf[i][0][0];
-                       info->buf[i][1][msize] = info->buf[i][1][0];
-                       info->buf[i][2][msize] = info->buf[i][2][0];
-                       info->buf[i][3][msize] = info->buf[i][3][0];
-               }
-               }
-               // er flt
-               vec_dbL = MM_FMA2_PD(vec_dcL0, vec_mixer, vec_dcL1, vec_dbL);
-               vec_mixer = MM_FMA2_PD(vec_mixer, vec_dry, vec_dbL, vec_wet);   
-               // rv delay in
-               _mm_store_pd(&bufrd[tmpi2[REV_EX_RD]], vec_mixer);
-               // ap
-               vai = _mm_cvtepi32_pd(_mm_shuffle_epi32(index2, 0xAA));
-               vtmp[0] = MM_FMA2_PD(vec_mixer, vec_levelap, vec_fbap1, vec_ap_fb);
-               vtmp[1] = MM_FMA2_PD(vec_mixrv, vec_levelap, vec_fbap2, vec_ap_fb);
-               _mm_store_sd(&info->abuf[REV_EX_ER_L1][tmpi2[REV_EX_AP1]], vtmp[0]); 
-               _mm_store_sd(&info->abuf[REV_EX_ER_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[0], vtmp[0], 0x3)); 
-               _mm_store_sd(&info->abuf[REV_EX_RV_L1][tmpi2[REV_EX_AP1]], vtmp[1]); 
-               _mm_store_sd(&info->abuf[REV_EX_RV_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[1], vtmp[0], 0x3)); 
-               if(tmpi2[REV_EX_AP1] == 0){
-                       info->abuf[0][info->size2[REV_EX_AP1]] = info->abuf[0][0];
-                       info->abuf[1][info->size2[REV_EX_AP1]] = info->abuf[1][0];
-                       info->abuf[2][info->size2[REV_EX_AP1]] = info->abuf[2][0];
-                       info->abuf[3][info->size2[REV_EX_AP1]] = info->abuf[3][0];
-               }               
-               for (i = 0; i < REV_EX_AP_MAX; i++) {                           
-               __m128d vc[2], vr[2], vd[2], vfp[2], vtmp[4], vv1[2], vv2[2];
-               __m128i vindex[2];
-               // lfo
-               vc[0] = _mm_add_pd(_mm_loadu_pd(&info->acount[i][REV_EX_ER_L1]), _mm_loadu_pd(&info->arate[i][REV_EX_ER_L1])); // mcount+mrate
-               vc[1] = _mm_add_pd(_mm_loadu_pd(&info->acount[i][REV_EX_RV_L1]), _mm_loadu_pd(&info->arate[i][REV_EX_RV_L1])); // mcount+mrate
-#if (USE_X86_EXT_INTRIN >= 6) // sse4.1
-               vc[0] = _mm_sub_pd(vc[0], _mm_floor_pd(vc[0])); // count-=floor(count)
-               vc[1] = _mm_sub_pd(vc[1], _mm_floor_pd(vc[1])); // count-=floor(count)
-#else
-               vc[0] = _mm_sub_pd(vc[0], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[0]))); // count-=floor(count) +\82Ì\82Ý
-               vc[1] = _mm_sub_pd(vc[1], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[1]))); // count-=floor(count) +\82Ì\82Ý
-#endif
-               _mm_storeu_pd(&info->acount[i][REV_EX_ER_L1], vc[0]);
-               _mm_storeu_pd(&info->acount[i][REV_EX_RV_L1], vc[1]);
-               vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->aphase[i][REV_EX_ER_L1])); // count+phase
-               vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->aphase[i][REV_EX_RV_L1])); // count+phase
-               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(count)
-               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(cuont)  
-               vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_ER_L1]), vd[0]); // depth* sine
-               vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_RV_L1]), vd[1]); // depth* sine
-               vfp[0] = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][REV_EX_ER_L1])), vd[0]); // index-delay-depth
-               vfp[1] = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][REV_EX_RV_L1])), vd[1]); // index-delay-depth         
-               vfp[0] = _mm_add_pd(vfp[0], _mm_and_pd(vasize, _mm_cmplt_pd(vfp[0], _mm_setzero_pd())));        // fp<0 ? fp+size       
-               vfp[1] = _mm_add_pd(vfp[1], _mm_and_pd(vasize, _mm_cmplt_pd(vfp[1], _mm_setzero_pd())));        // fp<0 ? fp+size       
-               vindex[0] = _mm_cvttpd_epi32(vfp[0]); // (int)floor(fp)
-               vindex[1] = _mm_cvttpd_epi32(vfp[1]); // (int)floor(fp)
-#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 floor
-               vfp[0] = _mm_sub_pd(vfp[0], _mm_floor_pd(vfp[0])); // fp-floor(fp)
-               vfp[1] = _mm_sub_pd(vfp[1], _mm_floor_pd(vfp[1])); // fp-floor(fp)
-#else
-               vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
-               vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
-#endif
-               vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
-               vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
-               vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
-               vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
-               vv2[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x3);
-               vec_mixer = _mm_add_pd(vec_mixer, MM_FMA_PD(_mm_sub_pd(vv2[0], vv1[0]), vfp[0], vv1[0]));
-               vec_mixrv = _mm_add_pd(vec_mixrv, MM_FMA_PD(_mm_sub_pd(vv2[1], vv1[1]), vfp[1], vv1[1]));
-               }
-               vec_fbap1 = vec_mixer; vec_fbap2 = vec_mixrv;
-               // out
-               vec_hist = _mm_sub_pd(_mm_mul_pd(vec_mixrv, vec_levelrv), _mm_mul_pd(vec_mixer, vec_leveler));
-               vec_dbH0 = vec_hist;
-               vec_hist = vec_dbH2 = MM_FMA5_PD(vec_dcH0, vec_dbH0, vec_dcH1, vec_dbH1, vec_dcH2, vec_dbH2, vec_dcH3, vec_dbH3, vec_dcH4, vec_dbH4);
-               vec_dbH4 = vec_dbH3; vec_dbH3 = vec_dbH2; vec_dbH2 = vec_dbH1; vec_dbH1 = vec_dbH0;
-               _mm_store_pd(&buf[k], vec_hist);
-       }
-       _mm_storeu_pd(info->hist, vec_hist); _mm_storeu_pd(er_fc->db, vec_dbL);
-       _mm_storeu_pd(&hpf->db[0], vec_dbH0); _mm_storeu_pd(&hpf->db[2], vec_dbH1); _mm_storeu_pd(&hpf->db[4], vec_dbH2);
-       _mm_storeu_pd(&hpf->db[6], vec_dbH3); _mm_storeu_pd(&hpf->db[8], vec_dbH4);
-       _mm_storeu_si128((__m128i *)info->index2, index2);
-       _mm_storeu_pd(info->fb_ap1, vec_fbap1); _mm_storeu_pd(info->fb_ap2, vec_fbap2);
-}
-
-static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0;
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       DATA_T *bufrd = info->buf2[REV_EX_RD];
-       __m128d vec_dcH0 = MM_LOAD1_PD(&hpf->dc[0]), vec_dcH1 = MM_LOAD1_PD(&hpf->dc[1]), vec_dcH2 = MM_LOAD1_PD(&hpf->dc[2]), 
-               vec_dcH3 = MM_LOAD1_PD(&hpf->dc[3]), vec_dcH4 = MM_LOAD1_PD(&hpf->dc[4]),
-               vec_dcL0 = MM_LOAD1_PD(&er_fc->dc[0]), vec_dcL1 = MM_LOAD1_PD(&er_fc->dc[1]);
-       __m128d vec_dbH0 = _mm_loadu_pd(&hpf->db[0]), vec_dbH1 = _mm_loadu_pd(&hpf->db[2]), vec_dbH2 = _mm_loadu_pd(&hpf->db[4]), 
-               vec_dbH3 = _mm_loadu_pd(&hpf->db[6]), vec_dbH4 = _mm_loadu_pd(&hpf->db[8]), vec_dbL = _mm_loadu_pd(er_fc->db);
-       __m128d vec_tmp1, vec_input_er, vec_input_rv, vec_mixer, vec_mixap, vec_mixrv, vec_tmp_rv, vec_db, 
-               vec_leveler = MM_LOAD1_PD(&info->leveler), vec_levelrv = MM_LOAD1_PD(&info->levelrv),
-               vec_dry = MM_LOAD1_PD(&info->flt_dry), vec_wet = MM_LOAD1_PD(&info->flt_wet),
-               vec_feedback = MM_LOAD1_PD(&info->feedback), vec_hist = _mm_loadu_pd(info->hist),
-               vec_mix_level = _mm_set1_pd(DIV_MIX_LEVEL);
-       __m128i index2 = _mm_loadu_si128((__m128i *)info->index2), size2 = _mm_loadu_si128((__m128i *)info->size2);
-       __m128i add_idx2 = _mm_set_epi32(1, 1, 2, 1);
-       __m128d vec_ap_fb = _mm_set1_pd(REV_EX_AP_FB), vec_levelap = MM_LOAD1_PD(&info->levelap), 
-               vec_fbap1 = _mm_loadu_pd(info->fb_ap1), vec_fbap2 = _mm_loadu_pd(info->fb_ap2);
-       __m128d vmsize = _mm_set1_pd(info->size2[REV_EX_UNIT]), vmi;
-       __m128d vasize = _mm_set1_pd(info->size2[REV_EX_AP1]), vai;
-       __m128d vtmp[2];
-       int32 msize = info->size2[REV_EX_UNIT], mindex;
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k += 2)
-       {               
-               ALIGN int32 tmpi2[4];
-               vec_input_er = _mm_add_pd(_mm_set1_pd((buf[k] + buf[k + 1]) * DIV_MIX_LEVEL), _mm_mul_pd(vec_hist, vec_feedback));
-               // index2 (rv delay, ap
-               index2 = _mm_add_epi32(index2, add_idx2);
-               index2 = _mm_and_si128(index2, _mm_cmplt_epi32(index2, size2));
-               _mm_store_si128((__m128i *)&tmpi2, index2);
-               // rv delay out
-               vec_input_rv = _mm_load_pd(&bufrd[tmpi2[REV_EX_RD]]);
-               // unit
-               vec_mixer = _mm_setzero_pd(); vec_mixrv = _mm_setzero_pd();
-               vmi = _mm_cvtepi32_pd(_mm_shuffle_epi32(index2, 0x0));
-               mindex = _mm_cvtsi128_si32(index2);
-               for (i = 0; i < info->unit_num; i++) {
-               __m128d vc[2], vr[2], vd[2], vfp[2], vtmp[4], vv1[2], vv2[2];
-               __m128i vindex[2];
-               // lfo
-               vc[0] = _mm_add_pd(_mm_loadu_pd(&info->mcount[i][REV_EX_ER_L1]), _mm_loadu_pd(&info->mrate[i][REV_EX_ER_L1])); // mcount+mrate
-               vc[1] = _mm_add_pd(_mm_loadu_pd(&info->mcount[i][REV_EX_RV_L1]), _mm_loadu_pd(&info->mrate[i][REV_EX_RV_L1])); // mcount+mrate
-#if (USE_X86_EXT_INTRIN >= 6) // sse4.1
-               vc[0] = _mm_sub_pd(vc[0], _mm_floor_pd(vc[0])); // mcount-=floor(mcount)
-               vc[1] = _mm_sub_pd(vc[1], _mm_floor_pd(vc[1])); // mcount-=floor(mcount)
-#else
-               vc[0] = _mm_sub_pd(vc[0], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[0]))); // mcount-=(int)(mcount) +\82Ì\82Ý
-               vc[1] = _mm_sub_pd(vc[1], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[1]))); // mcount-=(int)(mcount) +\82Ì\82Ý
-#endif
-               _mm_storeu_pd(&info->mcount[i][REV_EX_ER_L1], vc[0]);
-               _mm_storeu_pd(&info->mcount[i][REV_EX_RV_L1], vc[1]);
-               vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->mphase[i][REV_EX_ER_L1])); // mcount+mphase
-               vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->mphase[i][REV_EX_RV_L1])); // mcount+mphase
-               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(mc)
-               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(mc)     
-               vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_ER_L1]), vd[0]); // mdepth* sine
-               vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->mdepth[i][REV_EX_RV_L1]), vd[1]); // mdepth* sine
-               vfp[0] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_ER_L1])), vd[0]); // mindex-mdelay-mdepth
-               vfp[1] = _mm_sub_pd(_mm_sub_pd(vmi, _mm_loadu_pd(&info->mdelay[i][REV_EX_RV_L1])), vd[1]); // mindex-mdelay-mdepth              
-               vfp[0] = _mm_add_pd(vfp[0], _mm_and_pd(vmsize, _mm_cmplt_pd(vfp[0], _mm_setzero_pd())));        // fp<0 ? fp+msize      
-               vfp[1] = _mm_add_pd(vfp[1], _mm_and_pd(vmsize, _mm_cmplt_pd(vfp[1], _mm_setzero_pd())));        // fp<0 ? fp+msize      
-               vindex[0] = _mm_cvttpd_epi32(vfp[0]); // (int)floor(fp)
-               vindex[1] = _mm_cvttpd_epi32(vfp[1]); // (int)floor(fp)
-#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 floor
-               vfp[0] = _mm_sub_pd(vfp[0], _mm_floor_pd(vfp[0])); // fp-floor(fp)
-               vfp[1] = _mm_sub_pd(vfp[1], _mm_floor_pd(vfp[1])); // fp-floor(fp)
-#else
-               vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
-               vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
-#endif
-               vtmp[0] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->buf[i][REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->buf[i][REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
-               vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
-               vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
-               vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
-               vv2[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x3);
-               vv1[0] = MM_FMA_PD(_mm_sub_pd(vv2[0], vv1[0]), vfp[0], vv1[0]);
-               vv1[1] = MM_FMA_PD(_mm_sub_pd(vv2[1], vv1[1]), vfp[1], vv1[1]);
-               // er out
-               vec_mixer = _mm_add_pd(vec_mixer, vv1[0]);
-               _mm_store_sd(&(info->buf[i][0][mindex]), vec_input_er); 
-               _mm_store_sd(&(info->buf[i][1][mindex]), _mm_shuffle_pd(vec_input_er, vec_input_er, 0x1));
-               // rv save
-               vec_tmp_rv = _mm_set_pd(*info->rv_in[i][1], *info->rv_in[i][0]);
-               // rv out
-               vec_tmp1 = vv1[1];
-               lpf = &rv_fc[i];
-               vec_db = _mm_loadu_pd(lpf->db);
-               vec_db = MM_FMA2_PD(MM_LOAD1_PD(&lpf->dc[0]), vec_tmp1, MM_LOAD1_PD(&lpf->dc[1]), vec_db);
-               _mm_storeu_pd(lpf->db, vec_db);
-               vec_tmp1 = MM_FMA2_PD(vec_tmp1, vec_dry, vec_db, vec_wet);
-               vec_mixrv = _mm_add_pd(vec_mixrv, vec_tmp1);
-               _mm_storeu_pd(info->rv_out[i], vec_tmp1);
-               // rv in
-               vec_tmp1 = _mm_add_pd(vec_input_rv, _mm_mul_pd(vec_tmp_rv, MM_LOAD1_PD(&info->rv_feedback[i])));
-               _mm_store_sd(&(info->buf[i][2][mindex]), vec_tmp1);
-               _mm_store_sd(&(info->buf[i][3][mindex]), _mm_shuffle_pd(vec_tmp1, vec_tmp1, 0x1));
-               if(mindex == 0){
-                       info->buf[i][0][msize] = info->buf[i][0][0];
-                       info->buf[i][1][msize] = info->buf[i][1][0];
-                       info->buf[i][2][msize] = info->buf[i][2][0];
-                       info->buf[i][3][msize] = info->buf[i][3][0];
-               }
-               }
-               // er flt
-               vec_dbL = MM_FMA2_PD(vec_dcL0, vec_mixer, vec_dcL1, vec_dbL);
-               vec_mixer = MM_FMA2_PD(vec_mixer, vec_dry, vec_dbL, vec_wet);   
-               // rv delay in
-               _mm_store_pd(&bufrd[tmpi2[REV_EX_RD]], vec_mixer);      
-               // ap
-               vai = _mm_cvtepi32_pd(_mm_shuffle_epi32(index2, 0xAA));
-               vtmp[0] = MM_FMA2_PD(vec_mixer, vec_levelap, vec_fbap1, vec_ap_fb);
-               vtmp[1] = MM_FMA2_PD(vec_mixrv, vec_levelap, vec_fbap2, vec_ap_fb);
-               _mm_store_sd(&info->abuf[REV_EX_ER_L1][tmpi2[REV_EX_AP1]], vtmp[0]); 
-               _mm_store_sd(&info->abuf[REV_EX_ER_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[0], vtmp[0], 0x3)); 
-               _mm_store_sd(&info->abuf[REV_EX_RV_L1][tmpi2[REV_EX_AP1]], vtmp[1]); 
-               _mm_store_sd(&info->abuf[REV_EX_RV_R1][tmpi2[REV_EX_AP1]], _mm_shuffle_pd(vtmp[1], vtmp[0], 0x3)); 
-               if(tmpi2[REV_EX_AP1] == 0){
-                       info->abuf[0][info->size2[REV_EX_AP1]] = info->abuf[0][0];
-                       info->abuf[1][info->size2[REV_EX_AP1]] = info->abuf[1][0];
-                       info->abuf[2][info->size2[REV_EX_AP1]] = info->abuf[2][0];
-                       info->abuf[3][info->size2[REV_EX_AP1]] = info->abuf[3][0];
-               }               
-               for (i = 0; i < REV_EX_AP_MAX; i++) {                           
-               __m128d vc[2], vr[2], vd[2], vfp[2], vtmp[4], vv1[2], vv2[2];
-               __m128i vindex[2];
-               // lfo
-               vc[0] = _mm_add_pd(_mm_loadu_pd(&info->acount[i][REV_EX_ER_L1]), _mm_loadu_pd(&info->arate[i][REV_EX_ER_L1])); // mcount+mrate
-               vc[1] = _mm_add_pd(_mm_loadu_pd(&info->acount[i][REV_EX_RV_L1]), _mm_loadu_pd(&info->arate[i][REV_EX_RV_L1])); // mcount+mrate
-#if (USE_X86_EXT_INTRIN >= 6) // sse4.1
-               vc[0] = _mm_sub_pd(vc[0], _mm_floor_pd(vc[0])); // count-=floor(count)
-               vc[1] = _mm_sub_pd(vc[1], _mm_floor_pd(vc[1])); // count-=floor(count)
-#else
-               vc[0] = _mm_sub_pd(vc[0], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[0]))); // count-=floor(count) +\82Ì\82Ý
-               vc[1] = _mm_sub_pd(vc[1], _mm_cvtepi32_pd(_mm_cvttpd_epi32(vc[1]))); // count-=floor(count) +\82Ì\82Ý
-#endif
-               _mm_storeu_pd(&info->acount[i][REV_EX_ER_L1], vc[0]);
-               _mm_storeu_pd(&info->acount[i][REV_EX_RV_L1], vc[1]);
-               vr[0] = _mm_add_pd(vc[0], _mm_loadu_pd(&info->aphase[i][REV_EX_ER_L1])); // count+phase
-               vr[1] = _mm_add_pd(vc[1], _mm_loadu_pd(&info->aphase[i][REV_EX_RV_L1])); // count+phase
-               vd[0] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[0],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[0],0))); // lookup2_sine_p(count)
-               vd[1] = _mm_set_pd(lookup2_sine_p(MM_EXTRACT_F64(vr[1],1)), lookup2_sine_p(MM_EXTRACT_F64(vr[1],0))); // lookup2_sine_p(cuont)  
-               vd[0] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_ER_L1]), vd[0]); // depth* sine
-               vd[1] = _mm_mul_pd(_mm_loadu_pd(&info->adepth[i][REV_EX_RV_L1]), vd[1]); // depth* sine
-               vfp[0] = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][REV_EX_ER_L1])), vd[0]); // index-delay-depth
-               vfp[1] = _mm_sub_pd(_mm_sub_pd(vai, _mm_loadu_pd(&info->adelay[i][REV_EX_RV_L1])), vd[1]); // index-delay-depth         
-               vfp[0] = _mm_add_pd(vfp[0], _mm_and_pd(vasize, _mm_cmplt_pd(vfp[0], _mm_setzero_pd())));        // fp<0 ? fp+size       
-               vfp[1] = _mm_add_pd(vfp[1], _mm_and_pd(vasize, _mm_cmplt_pd(vfp[1], _mm_setzero_pd())));        // fp<0 ? fp+size       
-               vindex[0] = _mm_cvttpd_epi32(vfp[0]); // (int)floor(fp)
-               vindex[1] = _mm_cvttpd_epi32(vfp[1]); // (int)floor(fp)
-#if (USE_X86_EXT_INTRIN >= 6) // sse4.1 floor
-               vfp[0] = _mm_sub_pd(vfp[0], _mm_floor_pd(vfp[0])); // fp-floor(fp)
-               vfp[1] = _mm_sub_pd(vfp[1], _mm_floor_pd(vfp[1])); // fp-floor(fp)
-#else
-               vfp[0] = _mm_sub_pd(vfp[0], _mm_cvtepi32_pd(vindex[0])); // fp-vindex
-               vfp[1] = _mm_sub_pd(vfp[1], _mm_cvtepi32_pd(vindex[1])); // fp-vindex
-#endif
-               vtmp[0] = _mm_loadu_pd(&info->abuf[REV_EX_ER_L1][MM_EXTRACT_I32(vindex[0],0)]); // v1v2
-               vtmp[1] = _mm_loadu_pd(&info->abuf[REV_EX_ER_R1][MM_EXTRACT_I32(vindex[0],1)]); // v1v2
-               vtmp[2] = _mm_loadu_pd(&info->abuf[REV_EX_RV_L1][MM_EXTRACT_I32(vindex[1],0)]); // v1v2
-               vtmp[3] = _mm_loadu_pd(&info->abuf[REV_EX_RV_R1][MM_EXTRACT_I32(vindex[1],1)]); // v1v2
-               vv1[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x0);
-               vv1[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x0);
-               vv2[0] = _mm_shuffle_pd(vtmp[0], vtmp[1], 0x3);
-               vv2[1] = _mm_shuffle_pd(vtmp[2], vtmp[3], 0x3);
-               vec_mixer = _mm_add_pd(vec_mixer, MM_FMA_PD(_mm_sub_pd(vv2[0], vv1[0]), vfp[0], vv1[0]));
-               vec_mixrv = _mm_add_pd(vec_mixrv, MM_FMA_PD(_mm_sub_pd(vv2[1], vv1[1]), vfp[1], vv1[1]));
-               }
-               vec_fbap1 = vec_mixer; vec_fbap2 = vec_mixrv;   
-               // out
-               vec_hist = _mm_sub_pd(_mm_mul_pd(vec_mixrv, vec_levelrv), _mm_mul_pd(vec_mixer, vec_leveler));
-               vec_dbH0 = vec_hist;
-               vec_hist = vec_dbH2 = MM_FMA5_PD(vec_dcH0, vec_dbH0, vec_dcH1, vec_dbH1, vec_dcH2, vec_dbH2, vec_dcH3, vec_dbH3, vec_dcH4, vec_dbH4);
-               vec_dbH4 = vec_dbH3; vec_dbH3 = vec_dbH2; vec_dbH2 = vec_dbH1; vec_dbH1 = vec_dbH0;
-               _mm_store_pd(&buf[k], vec_hist);
-       }
-       _mm_storeu_pd(info->hist, vec_hist); _mm_storeu_pd(er_fc->db, vec_dbL);
-       _mm_storeu_pd(&hpf->db[0], vec_dbH0); _mm_storeu_pd(&hpf->db[2], vec_dbH1); _mm_storeu_pd(&hpf->db[4], vec_dbH2);
-       _mm_storeu_pd(&hpf->db[6], vec_dbH3); _mm_storeu_pd(&hpf->db[8], vec_dbH4);
-       _mm_storeu_si128((__m128i *)info->index2, index2);
-       _mm_storeu_pd(info->fb_ap1, vec_fbap1); _mm_storeu_pd(info->fb_ap2, vec_fbap2);
-}
-
-#else /* floating-point implementation */
-static void do_reverb_ex_mod_chST(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       int32 *mindex = &info->index2[REV_EX_UNIT], msize = info->size2[REV_EX_UNIT];
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       FLOAT_T leveler = info->leveler, levelrv = info->levelrv, feedback = info->feedback,
-               *rv_feedback = info->rv_feedback, flt_dry = info->flt_dry, flt_wet = info->flt_wet,
-               *dcH = hpf->dc, *dcL = er_fc->dc, st_sprd = info->st_sprd;      
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
-       DATA_T  hist[2] = {info->hist[0], info->hist[1],}, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2], sprd;
-       FLOAT_T mindexf;
-       // CH_STEREO:
-       RDTSC_TEST1
-       for (k = 0; k < count; k++)
-       {               
-#if !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT)
-               input[0] = buf[k] * info->in_level; input[1] = buf[k + 1] * info->in_level;
-#else
-               input[0] = buf[k]; input[1] = buf[k + 1];
-#endif
-               sprd = (input[0] - input[1]) * st_sprd;
-               input[0] = input[1] = (input[0] + input[1]) * DIV_MIX_LEVEL;
-               input[0] += hist[0] * feedback; input[1] += hist[1] * feedback;
-               // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               //unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               if((++*mindex) >= msize) {*mindex = 0;}
-               mindexf = *mindex;
-               for (i = 0; i < info->unit_num; i++) {  
-                       int32 index[4];
-                       DATA_T v1[4], v2[4];
-                       FLOAT_T fp1[4], fp2[4]; 
-                       // lfo
-                       info->mcount[i][REV_EX_ER_L1] += info->mrate[i][REV_EX_ER_L1];
-                       info->mcount[i][REV_EX_ER_L1] -= floor(info->mcount[i][REV_EX_ER_L1]);
-                       info->mcount[i][REV_EX_ER_R1] += info->mrate[i][REV_EX_ER_R1];
-                       info->mcount[i][REV_EX_ER_R1] -= floor(info->mcount[i][REV_EX_ER_R1]);
-                       info->mcount[i][REV_EX_RV_L1] += info->mrate[i][REV_EX_RV_L1];
-                       info->mcount[i][REV_EX_RV_L1] -= floor(info->mcount[i][REV_EX_RV_L1]);
-                       info->mcount[i][REV_EX_RV_R1] += info->mrate[i][REV_EX_RV_R1];
-                       info->mcount[i][REV_EX_RV_R1] -= floor(info->mcount[i][REV_EX_RV_R1]);
-                       fp1[0] = mindexf - info->mdelay[i][REV_EX_ER_L1] - info->mdepth[i][REV_EX_ER_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_L1] + info->mphase[i][REV_EX_ER_L1]);        
-                       fp1[1] = mindexf - info->mdelay[i][REV_EX_ER_R1] - info->mdepth[i][REV_EX_ER_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_R1] + info->mphase[i][REV_EX_ER_R1]);        
-                       fp1[2] = mindexf - info->mdelay[i][REV_EX_RV_L1] - info->mdepth[i][REV_EX_RV_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_L1] + info->mphase[i][REV_EX_RV_L1]);        
-                       fp1[3] = mindexf - info->mdelay[i][REV_EX_RV_R1] - info->mdepth[i][REV_EX_RV_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_R1] + info->mphase[i][REV_EX_RV_R1]);        
-                       if(fp1[0] < 0) {fp1[0] += msize;}
-                       if(fp1[1] < 0) {fp1[1] += msize;}               
-                       if(fp1[2] < 0) {fp1[2] += msize;}
-                       if(fp1[3] < 0) {fp1[3] += msize;}                       
-                       fp2[0] = floor(fp1[0]); index[0] = fp2[0]; 
-                       fp2[1] = floor(fp1[1]); index[1] = fp2[1];
-                       fp2[2] = floor(fp1[2]); index[2] = fp2[2]; 
-                       fp2[3] = floor(fp1[3]); index[3] = fp2[3]; 
-                       v1[0] = info->buf[i][REV_EX_ER_L1][index[0]]; v2[0] = info->buf[i][REV_EX_ER_L1][index[0] + 1];
-                       v1[1] = info->buf[i][REV_EX_ER_R1][index[1]]; v2[1] = info->buf[i][REV_EX_ER_R1][index[1] + 1];
-                       v1[2] = info->buf[i][REV_EX_RV_L1][index[2]]; v2[2] = info->buf[i][REV_EX_RV_L1][index[2] + 1];
-                       v1[3] = info->buf[i][REV_EX_RV_R1][index[3]]; v2[3] = info->buf[i][REV_EX_RV_R1][index[3] + 1];
-                       // er out
-                       dat_er[0] += v1[0] + (v2[0] - v1[0]) * (fp1[0] - fp2[0]); // linear interpolation
-                       dat_er[1] += v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
-                       // er in
-                       info->buf[i][0][*mindex] = input[0]; info->buf[i][1][*mindex] = input[1];
-                       input[0] += sprd; input[1] -= sprd; // spread
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = v1[2] + (v2[2] - v1[2]) * (fp1[2] - fp2[2]); // linear interpolation
-                       tmp1[1] = v1[3] + (v2[3] - v1[3]) * (fp1[3] - fp2[3]); // linear interpolation
-               //      sample_filter_stereo(&rv_fc1[i], &flt[0], &flt[1]);     
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = lpf->dc[0] * tmp1[0] + lpf->dc[1] * lpf->db[0];
-                       lpf->db[1] = lpf->dc[0] * tmp1[1] + lpf->dc[1] * lpf->db[1];
-                       dat_rv[0] += (info->rv_out[i][0] = tmp1[0] * flt_dry + lpf->db[0] * flt_wet);
-                       dat_rv[1] += (info->rv_out[i][1] = tmp1[1] * flt_dry + lpf->db[1] * flt_wet);
-                       // rv in
-                       info->buf[i][2][*mindex] = input_rv[0] + tmp_rv[0] * rv_feedback[i];
-                       info->buf[i][3][*mindex] = input_rv[1] + tmp_rv[1] * rv_feedback[i];    
-                       if(*mindex == 0){
-                               info->buf[i][0][msize] = info->buf[i][0][0];
-                               info->buf[i][1][msize] = info->buf[i][1][0];
-                               info->buf[i][2][msize] = info->buf[i][2][0];
-                               info->buf[i][3][msize] = info->buf[i][3][0];
-                       }
-               }
-       //      sample_filter_stereo(er_fc, &dat_er[0], &dat_er[1]);
-               dbL[0] = dcL[0] * dat_er[0] + dcL[1] * dbL[0];
-               dbL[1] = dcL[0] * dat_er[1] + dcL[1] * dbL[1];
-               dat_er[0] = dat_er[0] * flt_dry + dbL[0] * flt_wet;
-               dat_er[1] = dat_er[1] * flt_dry + dbL[1] * flt_wet;     
-               // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
-               // out
-               hist[0] = dat_rv[0] * levelrv - dat_er[0] * leveler;
-               hist[1] = dat_rv[1] * levelrv - dat_er[1] * leveler;
-       //      sample_filter_stereo(hpf, &hist[0], &hist[1]);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = dcH[0] * dbHL[0] + dcH[1] * dbHL[1] + dcH[2] * dbHL[2] + dcH[3] * dbHL[3] + dcH[4] * dbHL[4];
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = dcH[0] * dbHR[0] + dcH[1] * dbHR[1] + dcH[2] * dbHR[2] + dcH[3] * dbHR[3] + dcH[4] * dbHR[4];
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];
-               buf[k] = hist[0]; buf[++k] = hist[1];
-       }
-       info->hist[0] = hist[0], info->hist[1] = hist[1];
-       RDTSC_TEST2
-}
-
-static void do_reverb_ex_mod_chMS(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       int32 *mindex = &info->index2[REV_EX_UNIT], msize = info->size2[REV_EX_UNIT];
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       FLOAT_T leveler = info->leveler, levelrv = info->levelrv, feedback = info->feedback,
-               *rv_feedback = info->rv_feedback, flt_dry = info->flt_dry, flt_wet = info->flt_wet,
-               *dcH = hpf->dc, *dcL = er_fc->dc;       
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
-       DATA_T  hist[2] = {info->hist[0], info->hist[1],}, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2];
-       FLOAT_T mindexf;
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k++)
-       {               
-#if !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT)
-               input[0] = input[1] = (buf[k] + buf[k + 1]) * DIV_MIX_LEVEL * info->in_level;
-#else
-               input[0] = input[1] = (buf[k] + buf[k + 1]) * DIV_MIX_LEVEL;
-#endif
-               input[0] += hist[0] * feedback; input[1] += hist[1] * feedback;
-               // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               //unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               if((++*mindex) >= msize) {*mindex = 0;}
-               mindexf = *mindex;
-               for (i = 0; i < info->unit_num; i++) {
-                       int32 index[4];
-                       DATA_T v1[4], v2[4];
-                       FLOAT_T fp1[4], fp2[4]; 
-                       // lfo
-                       info->mcount[i][REV_EX_ER_L1] += info->mrate[i][REV_EX_ER_L1];
-                       info->mcount[i][REV_EX_ER_L1] -= floor(info->mcount[i][REV_EX_ER_L1]);
-                       info->mcount[i][REV_EX_ER_R1] += info->mrate[i][REV_EX_ER_R1];
-                       info->mcount[i][REV_EX_ER_R1] -= floor(info->mcount[i][REV_EX_ER_R1]);
-                       info->mcount[i][REV_EX_RV_L1] += info->mrate[i][REV_EX_RV_L1];
-                       info->mcount[i][REV_EX_RV_L1] -= floor(info->mcount[i][REV_EX_RV_L1]);
-                       info->mcount[i][REV_EX_RV_R1] += info->mrate[i][REV_EX_RV_R1];
-                       info->mcount[i][REV_EX_RV_R1] -= floor(info->mcount[i][REV_EX_RV_R1]);
-                       fp1[0] = mindexf - info->mdelay[i][REV_EX_ER_L1] - info->mdepth[i][REV_EX_ER_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_L1] + info->mphase[i][REV_EX_ER_L1]);        
-                       fp1[1] = mindexf - info->mdelay[i][REV_EX_ER_R1] - info->mdepth[i][REV_EX_ER_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_R1] + info->mphase[i][REV_EX_ER_R1]);        
-                       fp1[2] = mindexf - info->mdelay[i][REV_EX_RV_L1] - info->mdepth[i][REV_EX_RV_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_L1] + info->mphase[i][REV_EX_RV_L1]);        
-                       fp1[3] = mindexf - info->mdelay[i][REV_EX_RV_R1] - info->mdepth[i][REV_EX_RV_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_R1] + info->mphase[i][REV_EX_RV_R1]);        
-                       if(fp1[0] < 0) {fp1[0] += msize;}
-                       if(fp1[1] < 0) {fp1[1] += msize;}               
-                       if(fp1[2] < 0) {fp1[2] += msize;}
-                       if(fp1[3] < 0) {fp1[3] += msize;}                       
-                       fp2[0] = floor(fp1[0]);
-                       fp2[1] = floor(fp1[1]);
-                       fp2[2] = floor(fp1[2]);
-                       fp2[3] = floor(fp1[3]);
-                       index[0] = fp2[0]; 
-                       index[1] = fp2[1]; 
-                       index[2] = fp2[2]; 
-                       index[3] = fp2[3]; 
-                       v1[0] = info->buf[i][REV_EX_ER_L1][index[0]]; v2[0] = info->buf[i][REV_EX_ER_L1][index[0] + 1];
-                       v1[1] = info->buf[i][REV_EX_ER_R1][index[1]]; v2[1] = info->buf[i][REV_EX_ER_R1][index[1] + 1];
-                       v1[2] = info->buf[i][REV_EX_RV_L1][index[2]]; v2[2] = info->buf[i][REV_EX_RV_L1][index[2] + 1];
-                       v1[3] = info->buf[i][REV_EX_RV_R1][index[3]]; v2[3] = info->buf[i][REV_EX_RV_R1][index[3] + 1];
-                       // er out
-                       dat_er[0] += v1[0] + (v2[0] - v1[0]) * (fp1[0] - fp2[0]); // linear interpolation
-                       dat_er[1] += v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
-                       // er in
-                       info->buf[i][0][*mindex] = input[0]; info->buf[i][1][*mindex] = input[1];
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = v1[2] + (v2[2] - v1[2]) * (fp1[2] - fp2[2]); // linear interpolation
-                       tmp1[1] = v1[3] + (v2[3] - v1[3]) * (fp1[3] - fp2[3]); // linear interpolation
-               //      sample_filter_stereo(&rv_fc1[i], &flt[0], &flt[1]);     
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = lpf->dc[0] * tmp1[0] + lpf->dc[1] * lpf->db[0];
-                       lpf->db[1] = lpf->dc[0] * tmp1[1] + lpf->dc[1] * lpf->db[1];
-                       dat_rv[0] += (info->rv_out[i][0] = tmp1[0] * flt_dry + lpf->db[0] * flt_wet);
-                       dat_rv[1] += (info->rv_out[i][1] = tmp1[1] * flt_dry + lpf->db[1] * flt_wet);
-                       // rv in
-                       info->buf[i][2][*mindex] = input_rv[0] + tmp_rv[0] * rv_feedback[i];
-                       info->buf[i][3][*mindex] = input_rv[1] + tmp_rv[1] * rv_feedback[i];    
-                       if(*mindex == 0){
-                               info->buf[i][0][msize] = info->buf[i][0][0];
-                               info->buf[i][1][msize] = info->buf[i][1][0];
-                               info->buf[i][2][msize] = info->buf[i][2][0];
-                               info->buf[i][3][msize] = info->buf[i][3][0];
-                       }
-               }
-       //      sample_filter_stereo(er_fc, &dat_er[0], &dat_er[1]);
-               dbL[0] = dcL[0] * dat_er[0] + dcL[1] * dbL[0];
-               dbL[1] = dcL[0] * dat_er[1] + dcL[1] * dbL[1];
-               dat_er[0] = dat_er[0] * flt_dry + dbL[0] * flt_wet;
-               dat_er[1] = dat_er[1] * flt_dry + dbL[1] * flt_wet;             
-               // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
-               // out
-               hist[0] = dat_rv[0] * levelrv - dat_er[0] * leveler;
-               hist[1] = dat_rv[1] * levelrv - dat_er[1] * leveler;
-       //      sample_filter_stereo(hpf, &hist[0], &hist[1]);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = dcH[0] * dbHL[0] + dcH[1] * dbHL[1] + dcH[2] * dbHL[2] + dcH[3] * dbHL[3] + dcH[4] * dbHL[4];
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = dcH[0] * dbHR[0] + dcH[1] * dbHR[1] + dcH[2] * dbHR[2] + dcH[3] * dbHR[3] + dcH[4] * dbHR[4];
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];
-               buf[k] = hist[0]; buf[++k] = hist[1];
-       }
-       info->hist[0] = hist[0], info->hist[1] = hist[1];
-}
-
-static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       int32 *mindex = &info->index2[REV_EX_UNIT], msize = info->size2[REV_EX_UNIT];
-       int32 *aindex = &info->index2[REV_EX_AP1], asize = info->size2[REV_EX_AP1];
+       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
+       int32 *mindex = &info->index2[REV_EX_UNIT], msize = info->size2[REV_EX_UNIT];
+       int32 *aindex = &info->index2[REV_EX_AP1], asize = info->size2[REV_EX_AP1];
        FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
        FLOAT_T leveler = info->leveler, levelrv = info->levelrv, feedback = info->feedback,
                *rv_feedback = info->rv_feedback, flt_dry = info->flt_dry, flt_wet = info->flt_wet,
@@ -7854,7 +6514,7 @@ static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                *fb_ap1 = info->fb_ap1, *fb_ap2 = info->fb_ap2;
        FLOAT_T levelap = info->levelap;
        FLOAT_T mindexf, aindexf;
-       // CH_STEREO:
+       // CH_STEREO: CH_MIX_STEREO:
        RDTSC_TEST1
        for (k = 0; k < count; k++)
        {               
@@ -7946,6 +6606,7 @@ static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                // rv delay in
                bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
                // ap
+               if(ext_reverb_ex_ap_num){
                if ((++(*aindex)) >= asize) {*aindex -= asize;}
                aindexf = *aindex;
                info->abuf[REV_EX_ER_L1][*aindex] = dat_er[0] * levelap + fb_ap1[0] * REV_EX_AP_FB; 
@@ -7958,7 +6619,7 @@ static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                        info->abuf[2][asize] = info->abuf[2][0];
                        info->abuf[3][asize] = info->abuf[3][0];
                }               
-               for (i = 0; i < REV_EX_AP_MAX; i++) {                   
+               for (i = 0; i < ext_reverb_ex_ap_num; i++) {                    
                        int32 index[4];
                        DATA_T v1[4], v2[4];
                        FLOAT_T fp1[4], fp2[4]; 
@@ -8002,190 +6663,10 @@ static void do_reverb_ex_mod_chST_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                }
                fb_ap1[0] = dat_er[0]; fb_ap1[1] = dat_er[1];
                fb_ap2[0] = dat_rv[0]; fb_ap2[1] = dat_rv[1];
-               // out
-               hist[0] = dat_rv[0] * levelrv - dat_er[0] * leveler;
-               hist[1] = dat_rv[1] * levelrv - dat_er[1] * leveler;
-       //      sample_filter_stereo(hpf, &hist[0], &hist[1]);
-               dbHL[0] = hist[0];      
-               hist[0] = dbHL[2] = dcH[0] * dbHL[0] + dcH[1] * dbHL[1] + dcH[2] * dbHL[2] + dcH[3] * dbHL[3] + dcH[4] * dbHL[4];
-               dbHL[4] = dbHL[3];
-               dbHL[3] = dbHL[2];
-               dbHL[2] = dbHL[1];
-               dbHL[1] = dbHL[0];
-               dbHR[0] = hist[1];      
-               hist[1] = dbHR[2] = dcH[0] * dbHR[0] + dcH[1] * dbHR[1] + dcH[2] * dbHR[2] + dcH[3] * dbHR[3] + dcH[4] * dbHR[4];
-               dbHR[4] = dbHR[3];
-               dbHR[3] = dbHR[2];
-               dbHR[2] = dbHR[1];
-               dbHR[1] = dbHR[0];
-               buf[k] = hist[0]; buf[++k] = hist[1];
-       }
-       info->hist[0] = hist[0], info->hist[1] = hist[1];
-       RDTSC_TEST2
-}
-
-static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *info)
-{
-       int32 i, k = 0, *indexrd = &info->index2[REV_EX_RD], sizerd = info->size2[REV_EX_RD];
-       int32 *mindex = &info->index2[REV_EX_UNIT], msize = info->size2[REV_EX_UNIT];
-       int32 *aindex = &info->index2[REV_EX_AP1], asize = info->size2[REV_EX_AP1];
-       FilterCoefficients *er_fc = &info->er_fc, *rv_fc = info->rv_fc1, *hpf = &info->hpf, *lpf;
-       FLOAT_T leveler = info->leveler, levelrv = info->levelrv, feedback = info->feedback,
-               *rv_feedback = info->rv_feedback, flt_dry = info->flt_dry, flt_wet = info->flt_wet,
-               *dcH = hpf->dc, *dcL = er_fc->dc;       
-       FILTER_T *dbHL = &hpf->db[0], *dbHR = &hpf->db[5], *dbL = er_fc->db;
-       DATA_T  hist[2] = {info->hist[0], info->hist[1],}, *bufrd = info->buf2[REV_EX_RD],
-               input[2], input_rv[2], dat_er[2], dat_rv[2], tmp1[2], tmp_rv[2];
-       DATA_T *bufa1 = info->buf2[REV_EX_AP1], *bufa2 = info->buf2[REV_EX_AP2], 
-               *fb_ap1 = info->fb_ap1, *fb_ap2 = info->fb_ap2;
-       FLOAT_T levelap = info->levelap;
-       FLOAT_T mindexf, aindexf;
-       // CH_MIX_STEREO:
-       for (k = 0; k < count; k++)
-       {               
-#if !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT)
-               input[0] = input[1] = (buf[k] + buf[k + 1]) * DIV_MIX_LEVEL * info->in_level;
-#else
-               input[0] = input[1] = (buf[k] + buf[k + 1]) * DIV_MIX_LEVEL;
-#endif
-               input[0] += hist[0] * feedback; input[1] += hist[1] * feedback;
-               // rv delay out
-               if ((*indexrd += 2) >= sizerd) {*indexrd = 0;}
-               input_rv[0] = bufrd[*indexrd]; input_rv[1] = bufrd[*indexrd + 1];
-               //unit
-               dat_er[0] = 0; dat_er[1] = 0; dat_rv[0] = 0, dat_rv[1] = 0;
-               if((++*mindex) >= msize) {*mindex = 0;}
-               mindexf = *mindex;
-               for (i = 0; i < info->unit_num; i++) {
-                       int32 index[4];
-                       DATA_T v1[4], v2[4];
-                       FLOAT_T fp1[4], fp2[4]; 
-                       // lfo
-                       info->mcount[i][REV_EX_ER_L1] += info->mrate[i][REV_EX_ER_L1];
-                       info->mcount[i][REV_EX_ER_L1] -= floor(info->mcount[i][REV_EX_ER_L1]);
-                       info->mcount[i][REV_EX_ER_R1] += info->mrate[i][REV_EX_ER_R1];
-                       info->mcount[i][REV_EX_ER_R1] -= floor(info->mcount[i][REV_EX_ER_R1]);
-                       info->mcount[i][REV_EX_RV_L1] += info->mrate[i][REV_EX_RV_L1];
-                       info->mcount[i][REV_EX_RV_L1] -= floor(info->mcount[i][REV_EX_RV_L1]);
-                       info->mcount[i][REV_EX_RV_R1] += info->mrate[i][REV_EX_RV_R1];
-                       info->mcount[i][REV_EX_RV_R1] -= floor(info->mcount[i][REV_EX_RV_R1]);
-                       fp1[0] = mindexf - info->mdelay[i][REV_EX_ER_L1] - info->mdepth[i][REV_EX_ER_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_L1] + info->mphase[i][REV_EX_ER_L1]);        
-                       fp1[1] = mindexf - info->mdelay[i][REV_EX_ER_R1] - info->mdepth[i][REV_EX_ER_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_ER_R1] + info->mphase[i][REV_EX_ER_R1]);        
-                       fp1[2] = mindexf - info->mdelay[i][REV_EX_RV_L1] - info->mdepth[i][REV_EX_RV_L1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_L1] + info->mphase[i][REV_EX_RV_L1]);        
-                       fp1[3] = mindexf - info->mdelay[i][REV_EX_RV_R1] - info->mdepth[i][REV_EX_RV_R1]
-                               * lookup2_sine_p(info->mcount[i][REV_EX_RV_R1] + info->mphase[i][REV_EX_RV_R1]);        
-                       if(fp1[0] < 0) {fp1[0] += msize;}
-                       if(fp1[1] < 0) {fp1[1] += msize;}               
-                       if(fp1[2] < 0) {fp1[2] += msize;}
-                       if(fp1[3] < 0) {fp1[3] += msize;}                       
-                       fp2[0] = floor(fp1[0]);
-                       fp2[1] = floor(fp1[1]);
-                       fp2[2] = floor(fp1[2]);
-                       fp2[3] = floor(fp1[3]);
-                       index[0] = fp2[0]; 
-                       index[1] = fp2[1]; 
-                       index[2] = fp2[2]; 
-                       index[3] = fp2[3]; 
-                       v1[0] = info->buf[i][REV_EX_ER_L1][index[0]]; v2[0] = info->buf[i][REV_EX_ER_L1][index[0] + 1];
-                       v1[1] = info->buf[i][REV_EX_ER_R1][index[1]]; v2[1] = info->buf[i][REV_EX_ER_R1][index[1] + 1];
-                       v1[2] = info->buf[i][REV_EX_RV_L1][index[2]]; v2[2] = info->buf[i][REV_EX_RV_L1][index[2] + 1];
-                       v1[3] = info->buf[i][REV_EX_RV_R1][index[3]]; v2[3] = info->buf[i][REV_EX_RV_R1][index[3] + 1];
-                       // er out
-                       dat_er[0] += v1[0] + (v2[0] - v1[0]) * (fp1[0] - fp2[0]); // linear interpolation
-                       dat_er[1] += v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
-                       // er in
-                       info->buf[i][0][*mindex] = input[0]; info->buf[i][1][*mindex] = input[1];
-                       // rv save
-                       tmp_rv[0] = *info->rv_in[i][0]; tmp_rv[1] = *info->rv_in[i][1];
-                       // rv out       
-                       tmp1[0] = v1[2] + (v2[2] - v1[2]) * (fp1[2] - fp2[2]); // linear interpolation
-                       tmp1[1] = v1[3] + (v2[3] - v1[3]) * (fp1[3] - fp2[3]); // linear interpolation
-               //      sample_filter_stereo(&rv_fc1[i], &flt[0], &flt[1]);     
-                       lpf = &rv_fc[i];
-                       lpf->db[0] = lpf->dc[0] * tmp1[0] + lpf->dc[1] * lpf->db[0];
-                       lpf->db[1] = lpf->dc[0] * tmp1[1] + lpf->dc[1] * lpf->db[1];
-                       dat_rv[0] += (info->rv_out[i][0] = tmp1[0] * flt_dry + lpf->db[0] * flt_wet);
-                       dat_rv[1] += (info->rv_out[i][1] = tmp1[1] * flt_dry + lpf->db[1] * flt_wet);
-                       // rv in
-                       info->buf[i][2][*mindex] = input_rv[0] + tmp_rv[0] * rv_feedback[i];
-                       info->buf[i][3][*mindex] = input_rv[1] + tmp_rv[1] * rv_feedback[i];    
-                       if(*mindex == 0){
-                               info->buf[i][0][msize] = info->buf[i][0][0];
-                               info->buf[i][1][msize] = info->buf[i][1][0];
-                               info->buf[i][2][msize] = info->buf[i][2][0];
-                               info->buf[i][3][msize] = info->buf[i][3][0];
-                       }
-               }
-       //      sample_filter_stereo(er_fc, &dat_er[0], &dat_er[1]);
-               dbL[0] = dcL[0] * dat_er[0] + dcL[1] * dbL[0];
-               dbL[1] = dcL[0] * dat_er[1] + dcL[1] * dbL[1];
-               dat_er[0] = dat_er[0] * flt_dry + dbL[0] * flt_wet;
-               dat_er[1] = dat_er[1] * flt_dry + dbL[1] * flt_wet;             
-               // rv delay in
-               bufrd[*indexrd] = dat_er[0]; bufrd[*indexrd + 1] = dat_er[1];
-               // ap
-               if ((++(*aindex)) >= asize) {*aindex -= asize;}
-               aindexf = *aindex;
-               info->abuf[REV_EX_ER_L1][*aindex] = dat_er[0] * levelap + fb_ap1[0] * REV_EX_AP_FB; 
-               info->abuf[REV_EX_ER_R1][*aindex] = dat_er[1] * levelap + fb_ap1[1] * REV_EX_AP_FB; 
-               info->abuf[REV_EX_RV_L1][*aindex] = dat_rv[0] * levelap + fb_ap2[0] * REV_EX_AP_FB; 
-               info->abuf[REV_EX_RV_R1][*aindex] = dat_rv[1] * levelap + fb_ap2[1] * REV_EX_AP_FB; 
-               if(*aindex == 0){
-                       info->abuf[0][asize] = info->abuf[0][0];
-                       info->abuf[1][asize] = info->abuf[1][0];
-                       info->abuf[2][asize] = info->abuf[2][0];
-                       info->abuf[3][asize] = info->abuf[3][0];
-               }               
-               for (i = 0; i < REV_EX_AP_MAX; i++) {                   
-                       int32 index[4];
-                       DATA_T v1[4], v2[4];
-                       FLOAT_T fp1[4], fp2[4]; 
-                       // lfo
-                       info->acount[i][REV_EX_ER_L1] += info->arate[i][REV_EX_ER_L1];
-                       info->acount[i][REV_EX_ER_L1] -= floor(info->acount[i][REV_EX_ER_L1]);
-                       info->acount[i][REV_EX_ER_R1] += info->arate[i][REV_EX_ER_R1];
-                       info->acount[i][REV_EX_ER_R1] -= floor(info->acount[i][REV_EX_ER_R1]);
-                       info->acount[i][REV_EX_RV_L1] += info->arate[i][REV_EX_RV_L1];
-                       info->acount[i][REV_EX_RV_L1] -= floor(info->acount[i][REV_EX_RV_L1]);
-                       info->acount[i][REV_EX_RV_R1] += info->arate[i][REV_EX_RV_R1];
-                       info->acount[i][REV_EX_RV_R1] -= floor(info->acount[i][REV_EX_RV_R1]);
-                       fp1[0] = aindexf - info->adelay[i][REV_EX_ER_L1] - info->adepth[i][REV_EX_ER_L1]
-                               * lookup2_sine_p(info->acount[i][REV_EX_ER_L1] + info->aphase[i][REV_EX_ER_L1]);        
-                       fp1[1] = aindexf - info->adelay[i][REV_EX_ER_R1] - info->adepth[i][REV_EX_ER_R1]
-                               * lookup2_sine_p(info->acount[i][REV_EX_ER_R1] + info->aphase[i][REV_EX_ER_R1]);        
-                       fp1[2] = aindexf - info->adelay[i][REV_EX_RV_L1] - info->adepth[i][REV_EX_RV_L1]
-                               * lookup2_sine_p(info->acount[i][REV_EX_RV_L1] + info->aphase[i][REV_EX_RV_L1]);        
-                       fp1[3] = aindexf - info->adelay[i][REV_EX_RV_R1] - info->adepth[i][REV_EX_RV_R1]
-                               * lookup2_sine_p(info->acount[i][REV_EX_RV_R1] + info->aphase[i][REV_EX_RV_R1]);        
-                       if(fp1[0] < 0) {fp1[0] += asize;}
-                       if(fp1[1] < 0) {fp1[1] += asize;}               
-                       if(fp1[2] < 0) {fp1[2] += asize;}
-                       if(fp1[3] < 0) {fp1[3] += asize;}                       
-                       fp2[0] = floor(fp1[0]);
-                       fp2[1] = floor(fp1[1]);
-                       fp2[2] = floor(fp1[2]);
-                       fp2[3] = floor(fp1[3]);
-                       index[0] = fp2[0]; 
-                       index[1] = fp2[1]; 
-                       index[2] = fp2[2]; 
-                       index[3] = fp2[3]; 
-                       v1[0] = info->abuf[REV_EX_ER_L1][index[0]]; v2[0] = info->abuf[REV_EX_ER_L1][index[0] + 1];
-                       v1[1] = info->abuf[REV_EX_ER_R1][index[1]]; v2[1] = info->abuf[REV_EX_ER_R1][index[1] + 1];
-                       v1[2] = info->abuf[REV_EX_RV_L1][index[2]]; v2[2] = info->abuf[REV_EX_RV_L1][index[2] + 1];
-                       v1[3] = info->abuf[REV_EX_RV_R1][index[3]]; v2[3] = info->abuf[REV_EX_RV_R1][index[3] + 1];
-                       dat_er[0] += v1[0] + (v2[0] - v1[0]) * (fp1[0] - fp2[0]); // linear interpolation
-                       dat_er[1] += v1[1] + (v2[1] - v1[1]) * (fp1[1] - fp2[1]); // linear interpolation
-                       dat_rv[0] += v1[2] + (v2[2] - v1[2]) * (fp1[2] - fp2[2]); // linear interpolation
-                       dat_rv[1] += v1[3] + (v2[3] - v1[3]) * (fp1[3] - fp2[3]); // linear interpolation
                }
-               fb_ap1[0] = dat_er[0]; fb_ap1[1] = dat_er[1];
-               fb_ap2[0] = dat_rv[0]; fb_ap2[1] = dat_rv[1];
                // out
-               hist[0] = dat_rv[0] * levelrv - dat_er[0] * leveler;
-               hist[1] = dat_rv[1] * levelrv - dat_er[1] * leveler;
+               hist[0] = dat_rv[0] * levelrv + dat_er[0] * leveler;
+               hist[1] = dat_rv[1] * levelrv + dat_er[1] * leveler;
        //      sample_filter_stereo(hpf, &hist[0], &hist[1]);
                dbHL[0] = hist[0];      
                hist[0] = dbHL[2] = dcH[0] * dbHL[0] + dcH[1] * dbHL[1] + dcH[2] * dbHL[2] + dcH[3] * dbHL[3] + dcH[4] * dbHL[4];
@@ -8202,6 +6683,7 @@ static void do_reverb_ex_mod_chMS_ap8(DATA_T *buf, int32 count, InfoReverbEX *in
                buf[k] = hist[0]; buf[++k] = hist[1];
        }
        info->hist[0] = hist[0], info->hist[1] = hist[1];
+       RDTSC_TEST2
 }
 #endif
 
@@ -8226,19 +6708,895 @@ static void do_reverb_ex(DATA_T *buf, int32 count, InfoReverbEX *info)
 #ifdef REV_EX2
 
 
+/* from instrum.c */
+#define READ_CHAR(thing) \
+      if (1 != tf_read(&tmpchar, 1, 1, tf)) goto fail; \
+      thing = tmpchar;
+
+#define READ_SHORT_LE(thing) \
+      if (1 != tf_read(&tmpshort, 2, 1, tf)) goto fail; \
+      thing = LE_SHORT(tmpshort);
+#define READ_LONG_LE(thing) \
+      if (1 != tf_read(&tmplong, 4, 1, tf)) goto fail; \
+      thing = LE_LONG(tmplong);
+#define READ_SHORT_BE(thing) \
+      if (1 != tf_read(&tmpshort, 2, 1, tf)) goto fail; \
+      thing = BE_SHORT(tmpshort);
+#define READ_LONG_BE(thing) \
+      if (1 != tf_read(&tmplong, 4, 1, tf)) goto fail; \
+      thing = BE_LONG(tmplong);
+
+#define  WAVE_FORMAT_UNKNOWN      0x0000
+#define  WAVE_FORMAT_PCM          0x0001
+#define  WAVE_FORMAT_ADPCM        0x0002
+#define  WAVE_FORMAT_IEEE_FLOAT   0x0003
+#define  WAVE_FORMAT_ALAW         0x0006
+#define  WAVE_FORMAT_MULAW        0x0007
+#define  WAVE_FORMAT_EXTENSIBLE   0xFFFE
+
+typedef struct {
+       uint16 wFormatTag;
+       uint16 wChannels;
+       uint32 dwSamplesPerSec;
+       uint32 dwAvgBytesPerSec;
+       uint16 wBlockAlign;
+       uint16 wBitsPerSample;
+} WAVFormatChunk;
+
+static int read_WAVFormatChunk(struct timidity_file *tf, WAVFormatChunk *fmt, int csize)
+{
+       int32 tmplong;
+       int16 tmpshort;
+
+       READ_SHORT_LE(fmt->wFormatTag);
+       READ_SHORT_LE(fmt->wChannels);
+       READ_LONG_LE(fmt->dwSamplesPerSec);
+       READ_LONG_LE(fmt->dwAvgBytesPerSec);
+       READ_SHORT_LE(fmt->wBlockAlign);
+       READ_SHORT_LE(fmt->wBitsPerSample);
+       if (tf_seek(tf, csize - 0x10, SEEK_CUR) == -1)
+               goto fail;
+       return 1;
+       fail:
+               ctl->cmsg(CMSG_WARNING, VERB_VERBOSE, "Unable to read format chunk");
+       return 0;
+}
+
+#define SAMPLE_BIG_ENDIAN    (1L << 0)
+#define SAMPLE_8BIT_UNSIGNED (1L << 1)
+#define SAMPLE_IEEE_FLOAT    (1L << 2)
+#define WAVE_BUF_SIZE (1L << 11)       /* should be power of 2 */
+#define READ_WAVE_SAMPLE(dest, b, s) \
+    if (tf_read(dest, (b) * (s), 1, tf) != 1) \
+        goto fail
+#define READ_WAVE_FRAME(dest, b, f) \
+    READ_WAVE_SAMPLE(dest, b, (f) * channels)
+
+#define BLOCK_READ_BEGIN(stype, sbyte, fch) /* sbyte may be sizeof(stype) */ \
+    { \
+        stype data[WAVE_BUF_SIZE / sizeof(stype)]; \
+        int   j; \
+        for (block_frame_count = (sizeof data / sbyte / fch); block_frame_count != 0; block_frame_count >>= 1) { \
+            while (i <= frames - block_frame_count) { \
+                READ_WAVE_FRAME(data, sbyte, block_frame_count); \
+                for (j = 0; j < (block_frame_count * (fch)); i++)
+#define BLOCK_READ_END \
+    } } }
+
+#define BLOCK_READ3_BEGIN(fch) \
+    { \
+        uint8 data[WAVE_BUF_SIZE * 3]; \
+        int   j; \
+        for (block_frame_count = (sizeof data / 3 / fch); block_frame_count != 0; block_frame_count >>= 1) { \
+            while (i <= frames - block_frame_count) { \
+                READ_WAVE_FRAME(data, 3, block_frame_count); \
+                for (j = 0; j < (block_frame_count * (fch)); i++)
+#define BLOCK_READ3_END \
+    } } }
+
+static int read_sample_data(int32 flags, struct timidity_file *tf, int bits, int channels, int32 frames, float **sdata)
+{
+       int i, block_frame_count;
+
+       i = 0;  
+       if (bits == 32 && flags & SAMPLE_IEEE_FLOAT) {
+               BLOCK_READ_BEGIN(float, 4, channels)
+               {
+                       int c;
+                       float a;
+                       for (c = 0; c < channels; c++, j++)
+                               sdata[c][i] = data[j];
+               }
+               BLOCK_READ_END
+       }
+       else if (bits == 32)
+       {
+               if (flags & SAMPLE_BIG_ENDIAN) {
+                       BLOCK_READ_BEGIN(uint32, 4, channels)
+                       {
+                               int c;
+                               for (c = 0; c < channels; c++, j++)
+                                       sdata[c][i] = (float)BE_LONG(data[j]) * DIV_31BIT;
+                       }
+                       BLOCK_READ_END
+               } else {
+                       BLOCK_READ_BEGIN(uint32, 4, channels)
+                       {
+                               int c;
+                               for (c = 0; c < channels; c++, j++)
+                                       sdata[c][i] = (float)LE_LONG(data[j]) * DIV_31BIT;
+                       }
+                       BLOCK_READ_END
+               }
+       }
+       else if (bits == 24)
+       {
+               if (flags & SAMPLE_BIG_ENDIAN) {
+                       BLOCK_READ3_BEGIN(channels)
+                       {
+                               uint8 *ptr = (uint8*) &data + 3 * j;
+                               int c;
+                               for (c = 0; c < channels; c++, j++) {
+                                       uint32 d = (*ptr << 24) | (*(ptr + 1) << 16) | (*(ptr + 2) << 8);
+                                       sdata[c][i] = (float)(*(int32 *)&d) * DIV_31BIT;
+                                       ptr += 3;
+                               }
+                       }
+                       BLOCK_READ3_END
+               } else {
+                       BLOCK_READ3_BEGIN(channels)
+                       {
+                               uint8 *ptr = (uint8*) &data + 3 * j;
+                               int c;
+                               for (c = 0; c < channels; c++, j++) {
+                                       uint32 d = (*(ptr + 2) << 24) | (*(ptr + 1) << 16) | (*ptr << 8);
+                                       sdata[c][i] = (float)(*(int32 *)&d) * DIV_31BIT;
+                                       ptr += 3;
+                               }
+                       }
+                       BLOCK_READ3_END
+               }
+       }
+       else if (bits == 16)
+       {
+               if (flags & SAMPLE_BIG_ENDIAN) {
+                       BLOCK_READ_BEGIN(uint16, 2, channels)
+                       {
+                               int c;
+                               for (c = 0; c < channels; c++, j++)
+                                       sdata[c][i] = (float)BE_SHORT(data[j]) * DIV_15BIT;
+                       }
+                       BLOCK_READ_END
+               } else {
+                       BLOCK_READ_BEGIN(uint16, 2, channels)
+                       {
+                               int c;
+                               for (c = 0; c < channels; c++, j++)
+                                       sdata[c][i] = (float)LE_SHORT(data[j]) * DIV_15BIT;
+                       }
+                       BLOCK_READ_END
+               }
+       }
+       else
+               goto fail;
+       return 1;
+       fail:
+               ctl->cmsg(CMSG_WARNING, VERB_VERBOSE, "Unable to read sample data");
+       return 0;
+}
+
+static int import_wave_load(char *sample_file, InfoReverbEX2 *info)
+{
+       struct timidity_file *tf;
+       union {
+               int32 i[3];
+               char  c[12];
+       } xbuf;
+       char                  *buf = xbuf.c;
+       int                   state;            /* initial > fmt_read > data_read */
+       int                   i, chunk_size, type_index, type_size, samples = 0;
+       int32                 chunk_flags;
+       Sample                *sample;
+       WAVFormatChunk        format = { 0, 0, 0, 0, 0, 0 };
+
+       if ((tf = open_file(sample_file, 1, OF_NORMAL)) == NULL)
+               return 1;
+       if (tf_read(buf, 12, 1, tf) != 1
+                       || memcmp(&buf[0], "RIFF", 4) != 0 || memcmp(&buf[8], "WAVE", 4) != 0)
+       {
+               close_file(tf);
+               return 1;
+       }
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+       if(info->irdata[0] != NULL){
+               aligned_free(info->irdata[0]);
+               info->irdata[0] = NULL;
+       }
+       if(info->irdata[1] != NULL){
+               aligned_free(info->irdata[1]);
+               info->irdata[1] = NULL;
+       }
+#else
+       if(info->irdata[0] != NULL){
+               safe_free(info->irdata[0]);
+               info->irdata[0] = NULL;
+       }
+       if(info->irdata[1] != NULL){
+               safe_free(info->irdata[1]);
+               info->irdata[1] = NULL;
+       }
+#endif
+       ctl->cmsg(CMSG_INFO, VERB_NOISY, "Loading IR WAV: %s", sample_file);
+       state = chunk_flags = 0;
+       type_index = 4, type_size = 8;
+       for (;;) {
+               if (tf_read(&buf[type_index], type_size, 1, tf) != 1)
+                       break;
+               chunk_size = LE_LONG(xbuf.i[2]);
+               if (memcmp(&buf[4 + 0], "fmt ", 4) == 0)
+               {
+                       if (state != 0                                  /* only one format chunk is required */
+                                       || chunk_size < 0x10)   /* too small */
+                               break;
+                       if (!read_WAVFormatChunk(tf, &format, chunk_size))
+                               break;
+                       if (format.wChannels != 2 /* compressed */
+                                       || !(format.wFormatTag == WAVE_FORMAT_PCM
+                                               || format.wFormatTag == WAVE_FORMAT_IEEE_FLOAT)         /* compressed */
+                                       || format.wBitsPerSample & 0x7  /* padding not supported */
+                                       || format.wBitsPerSample < 16   /* less than 16-bit is not supported */
+                                       || format.wBitsPerSample > 32)  /* more than 32-bit is not supported */
+                               break;
+                       state++;
+               }
+               else if (memcmp(&buf[4 + 0], "data", 4) == 0)
+               {
+                       const int32 frames = chunk_size / format.wBlockAlign, bits = format.wBitsPerSample, 
+                               fflg = format.wFormatTag == WAVE_FORMAT_IEEE_FLOAT ? 1 : 0;
+                       float *sdata[2];
+                       int sflg;
+                       int32 bytes;
+
+                       if (state != 1)
+                               break;
+                       samples = format.wChannels;
+                       info->srate = format.dwSamplesPerSec;
+                       info->frame = frames + 8; //  + 8 for simd thread
+                       bytes = (frames + 16) * sizeof(float); //  + 16 for simd thread
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+                       info->irdata[0] = sdata[0] = (float *) aligned_malloc(bytes, ALIGN_SIZE);
+                       info->irdata[1] = sdata[1] = (float *) aligned_malloc(bytes, ALIGN_SIZE);
+#else
+                       info->irdata[0] = sdata[0] = (float *) safe_large_malloc(bytes);
+                       info->irdata[1] = sdata[1] = (float *) safe_large_malloc(bytes);
+#endif
+                       memset(&info->irdata[0][frames], 0, sizeof(float) * 16);
+                       memset(&info->irdata[1][frames], 0, sizeof(float) * 16);
+                       sflg = fflg ? (SAMPLE_8BIT_UNSIGNED|SAMPLE_IEEE_FLOAT) : SAMPLE_8BIT_UNSIGNED;
+                       if (!read_sample_data(sflg,     tf, bits, samples, frames, sdata))
+                               break;
+                       state++;
+               }
+               else if (tf_seek(tf, chunk_size, SEEK_CUR) == -1)
+                       break;
+               type_index = 4 - (chunk_size & 1);
+               type_size = 8 + (chunk_size & 1);
+       }
+       close_file(tf);
+       return (state != 2);
+}
+
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)        
+static void do_reverb_ex2_thread(int thread_num, void *info2);
+#define REV_IR_4T
+// undef LR\95ª\8a\84\82Å\82Ì2thread 
+// define LR\95ª\8a\84 + \93ü\97Í\83o\83b\83t\83@\82ð\91O\8cã\95ª\8a\84 \82Å4thread
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)  
+
+void free_reverb_ex2(InfoReverbEX2 *info)
+{
+       int i, k;
+       
+       for(i = 0; i < 2; i++){
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               if(info->irdata[i] != NULL){
+                       aligned_free(info->irdata[i]);
+                       info->irdata[i] = NULL;
+               }
+               if(info->buf[i] != NULL){
+                       aligned_free(info->buf[i]);
+                       info->buf[i] = NULL;
+               }
+#else
+               if(info->irdata[i] != NULL){
+                       safe_free(info->irdata[i]);
+                       info->irdata[i] = NULL;
+               }
+               if(info->buf[i] != NULL){
+                       safe_free(info->buf[i]);
+                       info->buf[i] = NULL;
+               }
+#endif // USE_X86_EXT_INTRIN
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)        
+#if defined(REV_IR_4T)
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               if(info->buf2[i] != NULL){
+                       aligned_free(info->buf2[i]);
+                       info->buf2[i] = NULL;
+               }
+#else
+               if(info->buf2[i] != NULL){
+                       safe_free(info->buf2[i]);
+                       info->buf2[i] = NULL;
+               }
+#endif
+#endif // defined(REV_IR_4T)
+               for(k = 0; k < 4; k++){
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+                       if(info->tbuf[k] != NULL){
+                               aligned_free(info->tbuf[k]);
+                               info->tbuf[k] = NULL;
+                       }
+#else
+                       if(info->tbuf[k] != NULL){
+                               safe_free(info->tbuf[k]);
+                               info->tbuf[k] = NULL;
+                       }
+#endif // USE_X86_EXT_INTRIN
+               }
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)  
+       }
+       
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)        
+       reset_effect_sub_thread(do_reverb_ex2_thread, info);
+       info->thread = 0;
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)  
+       info->init = 0;
+}
+
+void init_reverb_ex2(InfoReverbEX2 *info)
+{
+       int i, k;
+       float div;
+       int32 bytes;
+       char *sample_file = "irfile.wav";
+
+       if(play_mode->encoding & PE_MONO){
+               info->init = 0;
+               return;
+       }
+       if(import_wave_load(sample_file, info)){
+               info->init = 0;
+               return;
+       }
+       if(info->frame < 1 || info->srate < 1){
+               info->init = 0;
+               return;
+       }
+       // irdata resample OVx2
+       if(info->srate != play_mode->rate){
+               double ratio = (double)play_mode->rate / (double)info->srate;
+               int32 nframe = (double)info->frame * ratio + 0.5;
+               int32 nbytes = (nframe + 4) * sizeof(float);
+               double rcount = 0.0, rrate = DIV_2 / ratio; // DIV_2:OVx2 
+               float *ndata[2];
+               
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               ndata[0] = (float *) aligned_malloc(nbytes, ALIGN_SIZE);
+               ndata[1] = (float *) aligned_malloc(nbytes, ALIGN_SIZE);
+#else
+               ndata[0] = (float *) safe_large_malloc(nbytes);
+               ndata[1] = (float *) safe_large_malloc(nbytes);
+#endif
+               memset(ndata[0], 0, nbytes);
+               memset(ndata[1], 0, nbytes);            
+               for (i = 0; i < nframe; i++){
+                       int32 index;
+                       double v1, v2, fp;
+                       double tmp1, tmp2;
+                       // L
+                       index = (int32)rcount;
+                       v1 = info->irdata[0][index];
+                       v2 = info->irdata[0][index + 1];
+                       fp = rcount - floor(rcount);
+                       tmp1 = v1 + (v2 - v1) * fp;                     
+                       rcount += rrate;
+                       index = (int32)rcount;
+                       v1 = info->irdata[0][index];
+                       v2 = info->irdata[0][index + 1];
+                       fp = rcount - floor(rcount);
+                       tmp2 = v1 + (v2 - v1) * fp;     
+                       ndata[0][i] = (tmp1 + tmp2) * DIV_2;
+                       // R            
+                       index = (int32)rcount;
+                       v1 = info->irdata[1][index];
+                       v2 = info->irdata[1][index + 1];
+                       fp = rcount - floor(rcount);
+                       tmp1 = v1 + (v2 - v1) * fp;                     
+                       rcount += rrate;
+                       index = (int32)rcount;
+                       v1 = info->irdata[1][index];
+                       v2 = info->irdata[1][index + 1];
+                       fp = rcount - floor(rcount);
+                       tmp2 = v1 + (v2 - v1) * fp;     
+                       ndata[1][i] = (tmp1 + tmp2) * DIV_2;
+               }
+               for(i = 0; i < 2; i++){
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+                       if(info->irdata[i] != NULL){
+                               aligned_free(info->irdata[i]);
+                               info->irdata[i] = NULL;
+                       }
+#else
+                       if(info->irdata[i] != NULL){
+                               safe_free(info->irdata[i]);
+                               info->irdata[i] = NULL;
+                       }
+#endif         
+               }
+               info->frame = nframe; 
+               info->srate = play_mode->rate;
+               info->irdata[0] = ndata[0];
+               info->irdata[1] = ndata[1];
+       }
+#if 0
+       {
+               int32 b = 1;
+               int32 nframe = 1;
+               int32 nbytes;
+               float fdiv;
+               float *ndata[2];
+
+               for(b = 1; b < 26; b++) {       
+                       nframe = 1 << b;
+                       if(info->frame < nframe)
+                               break;
+               }
+               nframe *= 2;
+               nbytes = nframe * sizeof(float);
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               ndata[0] = (float *) aligned_malloc(nbytes, ALIGN_SIZE);
+               ndata[1] = (float *) aligned_malloc(nbytes, ALIGN_SIZE);
+#else
+               ndata[0] = (float *) safe_large_malloc(nbytes);
+               ndata[1] = (float *) safe_large_malloc(nbytes);
+#endif
+               memset(ndata[0], 0, nbytes);
+               memset(ndata[1], 0, nbytes);
+               fdiv = 1.0 / nframe;
+               for (i = 0; i < info->frame; i++){
+                       ndata[0][i] = info->irdata[0][i] * fdiv;
+                       ndata[1][i] = info->irdata[1][i] * fdiv;
+               }
+               
+               for(i = 0; i < 2; i++){
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+                       if(info->irdata[i] != NULL){
+                               aligned_free(info->irdata[i]);
+                               info->irdata[i] = NULL;
+                       }
+#else
+                       if(info->irdata[i] != NULL){
+                               safe_free(info->irdata[i]);
+                               info->irdata[i] = NULL;
+                       }
+#endif         
+               }
+               info->frame = nframe;
+               info->div_frame = fdiv;
+               info->srate = play_mode->rate;
+               info->irdata[0] = ndata[0];
+               info->irdata[1] = ndata[1];
+       }
+#endif
+
+
+       div = 0.125 * 48000.0 / (double)play_mode->rate * info->level * ext_reverb_ex_level;    
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+       {
+               __m128 vdiv = _mm_set1_ps(div);
+               for (i = 0; i < info->frame; i += 4){
+                       MM_LS_MUL_PS(&info->irdata[0][i], vdiv);
+                       MM_LS_MUL_PS(&info->irdata[1][i], vdiv);
+               }
+       }
+#else
+       for (i = 0; i < info->frame; i++){
+               info->irdata[0][i] *= div;
+               info->irdata[1][i] *= div;
+       }
+#endif
+       bytes = (info->frame + 4) * 2 * sizeof(float);
+       for(i = 0; i < 2; i++){
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               if(info->buf[i] != NULL){
+                       aligned_free(info->buf[i]);
+                       info->buf[i] = NULL;
+               }
+               info->buf[i] = (float *) aligned_malloc(bytes, ALIGN_SIZE);
+#else
+               if(info->buf[i] != NULL){
+                       safe_free(info->buf[i]);
+                       info->buf[i] = NULL;
+               }
+               info->buf[i] = (float *) safe_large_malloc(bytes);
+#endif
+               memset(info->buf[i], 0, bytes);
+       }
+       info->wcount = 0;
+
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)        
+       if(set_effect_sub_thread(do_reverb_ex2_thread, info, 4)){
+               info->thread = 0;
+               goto thru_thread;
+       }       
+       bytes = (info->frame + 4) * 2 * sizeof(float);
+
+#if defined(REV_IR_4T) 
+       for(i = 0; i < 2; i++){
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               if(info->buf2[i] != NULL){
+                       aligned_free(info->buf2[i]);
+                       info->buf2[i] = NULL;
+               }
+               info->buf2[i] = (float *) aligned_malloc(bytes, ALIGN_SIZE);
+#else
+               if(info->buf2[i] != NULL){
+                       safe_free(info->buf2[i]);
+                       info->buf2[i] = NULL;
+               }
+               info->buf2[i] = (float *) safe_large_malloc(bytes);
+#endif
+               memset(info->buf2[i], 0, bytes);
+       }
+#endif // defined(REV_IR_4T)
+
+       bytes = compute_buffer_size * sizeof(float);
+       for(k = 0; k < 4; k++){
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               if(info->tbuf[k] != NULL){
+                       aligned_free(info->tbuf[k]);
+                       info->tbuf[k] = NULL;
+               }
+               info->tbuf[k] = (float *) aligned_malloc(bytes, ALIGN_SIZE);
+#else
+               if(info->tbuf[k] != NULL){
+                       safe_free(info->tbuf[k]);
+                       info->tbuf[k] = NULL;
+               }
+               info->tbuf[k] = (float *) safe_large_malloc(bytes);
+#endif
+               memset(info->tbuf[k], 0, bytes);
+       }
+       info->twcount[0] = 0;
+       info->twcount[1] = 0;
+       info->twcount[2] = 0;
+       info->twcount[3] = 0;
+       info->tcount = 0;
+       info->thread = 1;
+thru_thread:
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)  
+       info->init = 1;
+}
+
+
+#if defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)                
+static void do_reverb_ex2_thread(int thread_num, void *info2)
+{
+       InfoReverbEX2 *info;
+       int i, k;
+       float *buf, *ibuf, *obuf, *irdata;
+       int32 *wcount;
+       int32 scount, ecount;
+               
+#if defined(REV_IR_4T)
+       if(thread_num >= 4)
+#else
+       if(thread_num >= 2)
+#endif
+               return;
+       if(!info2)
+               return;
+       info = (InfoReverbEX2 *)info2;
+       if(!info->init)
+               return; 
+       if(thread_num == 0){ // L
+               ibuf = info->tbuf[0];
+               obuf = info->tbuf[2];
+               buf = info->buf[0];
+               irdata = info->irdata[0];
+               wcount = &info->twcount[0];
+               scount = 0;
+#if defined(REV_IR_4T)
+               ecount = info->tcount >> 1;
+#else
+               ecount = info->tcount;
+#endif
+       }else if(thread_num == 1){ // R
+               ibuf = info->tbuf[1];
+               obuf = info->tbuf[3];
+               buf = info->buf[1];
+               irdata = info->irdata[1];
+               wcount = &info->twcount[1];
+               scount = 0;
+#if defined(REV_IR_4T)
+               ecount = info->tcount >> 1;
+#else
+               ecount = info->tcount;
+#endif
+       }else
+#if defined(REV_IR_4T)
+       if(thread_num == 2){ // L
+               ibuf = info->tbuf[0];
+               obuf = info->tbuf[2];
+               buf = info->buf2[0];
+               irdata = info->irdata[0];
+               wcount = &info->twcount[2];
+               scount = info->tcount >> 1;
+               ecount = info->tcount;
+       }else if(thread_num == 3){ // R
+               ibuf = info->tbuf[1];
+               obuf = info->tbuf[3];
+               buf = info->buf2[1];
+               irdata = info->irdata[1];
+               wcount = &info->twcount[3];
+               scount = info->tcount >> 1;
+               ecount = info->tcount;
+       }else
+#endif
+               return; 
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+       // 4set/1ch
+       {
+       __m128i vwc = _mm_set1_epi32(*wcount);
+       __m128i vframe = _mm_set1_epi32(info->frame);
+       __m128i vi4 = _mm_set1_epi32(4), vi0 = _mm_setzero_si128(), viset = _mm_set_epi32(3, 2, 1, 0);
+       __m128i vwcp;
+
+       vwc = _mm_sub_epi32(vwc, viset);
+       vwc = _mm_add_epi32(vwc, _mm_and_si128(vframe, _mm_cmplt_epi32(vwc, vi0)));
+#if defined(REV_IR_4T)
+       for (i = 0; i < scount; i += 4){
+               vwcp = _mm_add_epi32(vwc, vframe);
+               buf[MM_EXTRACT_I32(vwcp,0)] = buf[MM_EXTRACT_I32(vwc,0)] = ibuf[i];
+               buf[MM_EXTRACT_I32(vwcp,1)] = buf[MM_EXTRACT_I32(vwc,1)] = ibuf[i + 1];
+               buf[MM_EXTRACT_I32(vwcp,2)] = buf[MM_EXTRACT_I32(vwc,2)] = ibuf[i + 2];
+               buf[MM_EXTRACT_I32(vwcp,3)] = buf[MM_EXTRACT_I32(vwc,3)] = ibuf[i + 3];
+               vwc = _mm_sub_epi32(vwc, vi4);
+               vwc = _mm_add_epi32(vwc, _mm_and_si128(vframe, _mm_cmplt_epi32(vwc, vi0)));
+       }
+#endif
+       for (i = scount; i < ecount; i += 4){
+               __m128 sum0 = _mm_setzero_ps();
+               __m128 sum1 = _mm_setzero_ps();
+               __m128 sum2 = _mm_setzero_ps();
+               __m128 sum3 = _mm_setzero_ps();
+               __m128 tmp0, tmp2, tmp1, tmp3;          
+               vwcp = _mm_add_epi32(vwc, vframe);
+               buf[MM_EXTRACT_I32(vwcp,0)] = buf[MM_EXTRACT_I32(vwc,0)] = ibuf[i];
+               buf[MM_EXTRACT_I32(vwcp,1)] = buf[MM_EXTRACT_I32(vwc,1)] = ibuf[i + 1];
+               buf[MM_EXTRACT_I32(vwcp,2)] = buf[MM_EXTRACT_I32(vwc,2)] = ibuf[i + 2];
+               buf[MM_EXTRACT_I32(vwcp,3)] = buf[MM_EXTRACT_I32(vwc,3)] = ibuf[i + 3];
+               // FMA\83\8b\81[\83v\82Ì\8dÅ\8cã\82Ìw0w1w2w3\8fd\95¡\95\94\95ª\82Í\8e\96\91O\82Éirdata\8dÅ\8cã\82É0\82ð\92Ç\89Á\82·\82é\82±\82Æ\82Å\89ñ\94ð
+               for (k = 0; k < info->frame; k += 4){
+                       __m128 vir = _mm_load_ps(&irdata[k]);
+                       vwcp = _mm_add_epi32(vwc, _mm_set1_epi32(k));
+                       sum0 = MM_FMA_PS(_mm_loadu_ps(&buf[MM_EXTRACT_I32(vwcp,0)]), vir, sum0);
+                       sum1 = MM_FMA_PS(_mm_loadu_ps(&buf[MM_EXTRACT_I32(vwcp,1)]), vir, sum1);
+                       sum2 = MM_FMA_PS(_mm_loadu_ps(&buf[MM_EXTRACT_I32(vwcp,2)]), vir, sum2);
+                       sum3 = MM_FMA_PS(_mm_loadu_ps(&buf[MM_EXTRACT_I32(vwcp,3)]), vir, sum3);
+               }
+               vwc = _mm_sub_epi32(vwc, vi4);
+               vwc = _mm_add_epi32(vwc, _mm_and_si128(vframe, _mm_cmplt_epi32(vwc, vi0)));
+               // sum0 v0,v1,v2,v3 // sum1 v4,v5,v6,v7 // sum2 v8,v9,v10,v11 // sum3 v12,v13,v14,v15
+               tmp0 = _mm_shuffle_ps(sum0, sum1, 0x44); // v0,v1,v4,v5
+               tmp2 = _mm_shuffle_ps(sum0, sum1, 0xEE); // v2,v3,v6,v7
+               tmp1 = _mm_shuffle_ps(sum2, sum3, 0x44); // v8,v9,v12,v13
+               tmp3 = _mm_shuffle_ps(sum2, sum3, 0xEE); // v10,v11,v14,v15
+               sum0 = _mm_shuffle_ps(tmp0, tmp1, 0x88); // v0,v4,v8,v12
+               sum1 = _mm_shuffle_ps(tmp0, tmp1, 0xDD); // v1,v5,v9,v13
+               sum2 = _mm_shuffle_ps(tmp2, tmp3, 0x88); // v2,v6,v10,v14
+               sum3 = _mm_shuffle_ps(tmp2, tmp3, 0xDD); // v3,v7,v11,v15
+               sum0 = _mm_add_ps(sum0, sum1);
+               sum2 = _mm_add_ps(sum2, sum3);
+               sum0 = _mm_add_ps(sum0, sum2); // v0123,v4567,v89AB,vCDEF
+               _mm_store_ps(&obuf[i], sum0);
+       }
+#if defined(REV_IR_4T)
+       for (i = ecount; i < info->tcount; i += 4){
+               vwcp = _mm_add_epi32(vwc, vframe);
+               buf[MM_EXTRACT_I32(vwcp,0)] = buf[MM_EXTRACT_I32(vwc,0)] = ibuf[i];
+               buf[MM_EXTRACT_I32(vwcp,1)] = buf[MM_EXTRACT_I32(vwc,1)] = ibuf[i + 1];
+               buf[MM_EXTRACT_I32(vwcp,2)] = buf[MM_EXTRACT_I32(vwc,2)] = ibuf[i + 2];
+               buf[MM_EXTRACT_I32(vwcp,3)] = buf[MM_EXTRACT_I32(vwc,3)] = ibuf[i + 3];
+               vwc = _mm_sub_epi32(vwc, vi4);
+               vwc = _mm_add_epi32(vwc, _mm_and_si128(vframe, _mm_cmplt_epi32(vwc, vi0)));
+       }       
+#endif
+       *wcount = MM_EXTRACT_I32(vwc,0);
+       }
+#else
+       {
+#if defined(REV_IR_4T)
+       for (i = 0; i < scount; i++){
+               int32 w0 = *wcount;     
+               --(*wcount);
+               if(*wcount < 0)
+                       *wcount += info->frame;
+               buf[w0 + info->frame] = buf[w0] = ibuf[i];
+       }
+#endif
+       for (i = scount; i < ecount; i++){
+               float sum = 0;
+               int32 w = *wcount;      
+               --(*wcount);
+               if(*wcount < 0)
+                       *wcount += info->frame;
+               buf[w] = ibuf[i];
+               buf[w + info->frame] = ibuf[i];
+               for (k = 0; k < info->frame; k++){
+                       int32 r = w + k;
+                       sum += buf[r] * irdata[k];
+               }
+               obuf[i] = sum;
+       }
+#if defined(REV_IR_4T)
+       for (i = ecount; i < info->tcount; i++){
+               int32 w0 = *wcount;     
+               --(*wcount);
+               if(*wcount < 0)
+                       *wcount += info->frame;
+               buf[w0 + info->frame] = buf[w0] = ibuf[i];
+       }
+#endif
+       }
+#endif
+}
+#endif // defined(MULTI_THREAD_COMPUTE2) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)  
+
+
 static void do_reverb_ex2(DATA_T *buf, int32 count, InfoReverbEX2 *info)
 {
-       int i;
+       int32 i, k;
+
        if(count == MAGIC_INIT_EFFECT_INFO) {
                init_reverb_ex2(info);
                return;
        } else if(count == MAGIC_FREE_EFFECT_INFO) {
-               free_reverb_ex(info);
+               free_reverb_ex2(info);
                return;
-       } if(count < 0)
+       } else if(count < 0)
                return; 
-       info->do_reverb_mode(buf, count, info);
+       else if(!info->init)
+               return;
+       if(info->thread){
+               info->tcount = count >> 1;
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               for (i = 0, k = 0; i < count; i += 8, k += 4){  
+                       __m128 tmp0, tmp1, tmp2, tmp3;
+                       // in
+                       tmp0 = _mm_cvtpd_ps(_mm_load_pd(&buf[i]));
+                       tmp1 = _mm_cvtpd_ps(_mm_load_pd(&buf[i + 2]));
+                       tmp2 = _mm_cvtpd_ps(_mm_load_pd(&buf[i + 4]));
+                       tmp3 = _mm_cvtpd_ps(_mm_load_pd(&buf[i + 6]));
+                       tmp0 = _mm_shuffle_ps(tmp0, tmp1, 0x44);  
+                       tmp2 = _mm_shuffle_ps(tmp2, tmp3, 0x44);                        
+                       _mm_store_ps(&info->tbuf[0][k], _mm_shuffle_ps(tmp0, tmp2, 0x88));
+                       _mm_store_ps(&info->tbuf[1][k], _mm_shuffle_ps(tmp0, tmp2, 0xdd));
+               }
+#else
+               for (i = 0, k = 0; i < count; i++, k++){                        
+                       info->tbuf[0][k] = buf[i];
+                       i++;
+                       info->tbuf[1][k] = buf[i];
+                       k++;
+               }
+#endif
+#if defined(REV_IR_4T)
+               go_effect_sub_thread(do_reverb_ex2_thread, info, 4);
+#else
+               go_effect_sub_thread(do_reverb_ex2_thread, info, 2);
+#endif         
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               for (i = 0, k = 0; i < count; i += 8, k += 4){  
+                       __m128 tmp0, tmp1, tmp2, tmp3;
+                       // out
+                       tmp1 = _mm_load_ps(&info->tbuf[2][k]);
+                       tmp3 = _mm_load_ps(&info->tbuf[3][k]);
+                       tmp0 = _mm_unpacklo_ps(tmp1, tmp3);
+                       tmp2 = _mm_unpackhi_ps(tmp1, tmp3);
+                       _mm_store_pd(&buf[i], _mm_cvtps_pd(tmp0));
+                       _mm_store_pd(&buf[i + 2], _mm_cvtps_pd(_mm_shuffle_ps(tmp0, tmp0, 0x4e)));
+                       _mm_store_pd(&buf[i + 4], _mm_cvtps_pd(tmp2));
+                       _mm_store_pd(&buf[i + 6], _mm_cvtps_pd(_mm_shuffle_ps(tmp2, tmp2, 0x4e)));
+               }
+#else
+               for (i = 0, k = 0; i < count; i++, k++){        
+                       buf[i] = info->tbuf[2][k];      
+                       i++;
+                       buf[i] = info->tbuf[3][k];
+               }
+#endif
+               return;
+       }
+
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+// 2set/1ch
+       for (i = 0; i < count; i += 4){
+               __m128 sumL1 = _mm_setzero_ps();
+               __m128 sumR1 = _mm_setzero_ps();
+               __m128 sumL2 = _mm_setzero_ps();
+               __m128 sumR2 = _mm_setzero_ps();
+               __m128 tmp0, tmp2, tmp1, tmp3, vir0, vir1; 
+               __m128 sum0, sum1, sum2, sum3;
+               int32 w1, w2;
+
+               w1 = info->wcount;
+               --info->wcount;
+               if(info->wcount < 0)
+                       info->wcount += info->frame;
+               w2 = info->wcount;
+               --info->wcount;
+               if(info->wcount < 0)
+                       info->wcount += info->frame;
+               info->buf[0][w1] = buf[i];
+               info->buf[1][w1] = buf[i + 1];
+               info->buf[0][w1 + info->frame] = buf[i];
+               info->buf[1][w1 + info->frame] = buf[i + 1];
+               info->buf[0][w2] = buf[i + 2];
+               info->buf[1][w2] = buf[i + 3];
+               info->buf[0][w2 + info->frame] = buf[i + 2];
+               info->buf[1][w2 + info->frame] = buf[i + 3];
+               // FMA\83\8b\81[\83v\82Ì\8dÅ\8cã\82Ìw1w2\8fd\95¡\95\94\95ª\82Í\8e\96\91O\82Éirdata\8dÅ\8cã\82É0\82ð\92Ç\89Á\82·\82é\82±\82Æ\82Å\89ñ\94ð
+               for (k = 0; k < info->frame; k += 4){
+                       int32 r1 = w1 + k;
+                       int32 r2 = w2 + k;
+                       __m128 vir0 = _mm_load_ps(&info->irdata[0][k]);
+                       __m128 vir1 = _mm_load_ps(&info->irdata[1][k]);
+                       sumL1 = MM_FMA_PS(_mm_loadu_ps(&info->buf[0][r1]), vir0, sumL1);
+                       sumR1 = MM_FMA_PS(_mm_loadu_ps(&info->buf[1][r1]), vir1, sumR1);
+                       sumL2 = MM_FMA_PS(_mm_loadu_ps(&info->buf[0][r2]), vir0, sumL2);
+                       sumR2 = MM_FMA_PS(_mm_loadu_ps(&info->buf[1][r2]), vir1, sumR2);
+               }
+               // sumL1 v0,v1,v2,v3 // sumR1 v4,v5,v6,v7 // sumL2 v8,v9,v10,v11 // sumR3 v12,v13,v14,v15
+               tmp0 = _mm_shuffle_ps(sumL1, sumR1, 0x44); // v0,v1,v4,v5
+               tmp2 = _mm_shuffle_ps(sumL1, sumR1, 0xEE); // v2,v3,v6,v7
+               tmp1 = _mm_shuffle_ps(sumL2, sumR2, 0x44); // v8,v9,v12,v13
+               tmp3 = _mm_shuffle_ps(sumL2, sumR2, 0xEE); // v10,v11,v14,v15
+               sum0 = _mm_shuffle_ps(tmp0, tmp1, 0x88); // v0,v4,v8,v12
+               sum1 = _mm_shuffle_ps(tmp0, tmp1, 0xDD); // v1,v5,v9,v13
+               sum2 = _mm_shuffle_ps(tmp2, tmp3, 0x88); // v2,v6,v10,v14
+               sum3 = _mm_shuffle_ps(tmp2, tmp3, 0xDD); // v3,v7,v11,v15
+               sum0 = _mm_add_ps(sum0, sum1);
+               sum2 = _mm_add_ps(sum2, sum3);
+               sum0 = _mm_add_ps(sum0, sum2); // v0123,v4567,v89AB,vCDEF
+               sum1 = _mm_shuffle_ps(sum0, sum0, 0x4e);
+               _mm_store_pd(&buf[i], _mm_cvtps_pd(sum0));
+               _mm_store_pd(&buf[i + 2], _mm_cvtps_pd(sum1));
+       }
+#else
+       for (i = 0; i < count; i++){
+               float sumL = 0, sumR = 0;
+               int32 w = info->wcount; 
+               --info->wcount;
+               if(info->wcount < 0)
+                       info->wcount += info->frame;
+               info->buf[0][w] = buf[i];
+               info->buf[1][w] = buf[i + 1];
+               info->buf[0][w + info->frame] = buf[i];
+               info->buf[1][w + info->frame] = buf[i + 1];
+               for (k = 0; k < info->frame; k++){
+                       int32 r = w + k;
+                       sumL += info->buf[0][r] * info->irdata[0][k];
+                       sumR += info->buf[1][r] * info->irdata[1][k];
+               }
+               buf[i] = sumL;          
+               i++;
+               buf[i] = sumR;  
+       }
+#endif
 }
+
 #endif
 
 
@@ -12516,296 +11874,119 @@ static void do_enhancer_multi(DATA_T *buf, int32 count, InfoEnhancer *info)
 }
 
 
-#define HUMANIZER_LEVEL (0.75)
-#define HUMANIZER_CUTOFF (1500.0)
-#define HUMANIZER_RESO (6.0)
-#define HUMANIZER_LFO_RATE (0.125)
-#define HUMANIZER_LFO_DEPTH (2.0)
+#define HUMANIZER_LEVEL (2.5)
+#define HUMANIZER_CUTOFF (1000)
+#define HUMANIZER_RESO (48)
 
-const double vowel_coeff[5][11] =
+const FLOAT_T humanizer_cf[5][HUMANIZER_PHASE] = 
 {
-       { 8.11044e-06, 8.943665402, -36.83889529, 92.01697887, -154.337906, 181.6233289,
-               -151.8651235, 89.09614114, -35.10298511, 8.388101016, -0.923313471 },  ///A
-       { 3.33819e-06, 8.893102966, -36.49532826, 90.96543286, -152.4545478, 179.4835618,
-               -150.315433, 88.43409371, -34.98612086, 8.407803364, -0.932568035 },  ///I
-       { 4.09431e-07, 8.997322763, -37.20218544, 93.11385476, -156.2530937, 183.7080141,
-               -153.2631681, 89.59539726, -35.12454591, 8.338655623, -0.910251753 },  ///U
-       { 4.36215e-06, 8.90438318, -36.55179099, 91.05750846, -152.422234, 179.1170248,
-               -149.6496211, 87.78352223, -34.60687431, 8.282228154, -0.914150747 },  ///E
-       { 1.13572e-06, 8.994734087, -37.2084849, 93.22900521, -156.6929844, 184.596544,
-               -154.3755513, 90.49663749, -35.58964535, 8.478996281, -0.929252233 },   ///O
+       {0.75, 0.81, 0.93, 0.96, 2.27, 2.54, 3.07, 4.42, 6.05, 7.04 }, // a
+       {0.26, 0.28, 0.30, 2.67, 2.92, 3.36, 3.52, 3.94, 4.48, 6.24 }, // i
+       {0.13, 0.15, 0.18, 0.20, 0.86, 0.88, 0.90, 0.92, 1.17, 2.95 }, // u
+       {0.48, 0.50, 0.60, 0.62, 1.91, 2.19, 2.48, 3.58, 4.28, 6.28 }, // e
+       {0.43, 0.44, 0.48, 0.49, 0.51, 0.57, 0.61, 0.67, 0.78, 1.76 }, // o
 };
 
-#if (OPT_MODE == 1) && !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT) /* fixed-point implementation */
-static void do_humanizer(DATA_T *buf, int32 count, InfoHumanizer *info)
+const FLOAT_T humanizer_cf2[5] = 
 {
-       int i, k, cnt = count / 2;
-       FLOAT_T *dc = info->dc, *vc = info->vc;
-       FLOAT_T *db = info->db;
+       1.50, // a
+       1.09, // i
+       0.35, // u
+       1.11, // e
+       7.76, // o
+};
 
-       if(count == MAGIC_INIT_EFFECT_INFO) {           
-               if(!info->init){
-                       FLOAT_T dclevel = 0.0;
-                       FLOAT_T div_phase = 1.0 / (FLOAT_T)HUMANIZER_PHASE;
-                       info->init = 1;
-                       info->p_vowel = info->vowel;
-                       for(i = 0; i < 11; i++){
-                               info->dc[i] = info->vc[i] = vowel_coeff[info->vowel][i];
-                               dclevel += dc[i];
-                       }
-                       info->fmt_level = (dclevel > 1.0) ? (1.0 / dclevel) : 1.0;
-                       info->dc[11] = 0;
-                       memset(info->db, 0, sizeof(info->db));
-                       info->p_accel = info->accel;
-                       init_envelope3(&info->env, 1.0, info->accel * playmode_rate_ms);
-                       info->p_ac = info->env.vol;
-                       // lfo
-                       info->lfo_rate = HUMANIZER_LFO_RATE * div_playmode_rate * M_PI2;
-                       info->lfo_count = 0; // min point
-                       for(i = 0; i < HUMANIZER_PHASE; i++){
-                               info->lfo_phase[i] = (FLOAT_T)i * div_phase * M_PI2;
-                               init_sample_filter(&info->fc[i], HUMANIZER_CUTOFF, HUMANIZER_RESO, i & 0x1 ? FILTER_BCF12_3 : FILTER_BPF12_3);
-                       }
-                       info->flt_level = pow((FLOAT_T)div_phase, 0.666666666);
-               }
-               if(info->od_sw)
-                       init_drive(&info->drv, 4, 0.2, 0.7, info->drive * otd.efx_CustomHmnLvIn);
-               else
-                       init_drive(&info->drv, 0, 0.0, 0.7, 1.0);
-               info->leveld = info->flt_level * HUMANIZER_LEVEL * otd.efx_CustomHmnLvOut;
-               info->leveli = TIM_FSCALE(info->leveld, 24);
-               if(info->p_accel != info->accel){
-                       info->p_accel = info->accel;
-                       reset_envelope3(&info->env, 1.0, info->accel * playmode_rate_ms);
-               }
-               if(info->p_vowel != info->vowel){
-                       info->p_vowel = info->vowel;
-                       for(i = 0; i < 11; i++)
-                               info->vc[i] = info->dc[i];
-                       init_envelope3(&info->env, 0.0, info->accel * playmode_rate_ms);
-                       reset_envelope3(&info->env, 1.0, ENVELOPE_KEEP);
-                       info->p_ac = -1;
-               }                       
-               return;
-       } else if(count == MAGIC_FREE_EFFECT_INFO) {
-               info->init = 0;
-               return;
-       } else if(count == 0) {
-               return;
-       }       
-       // vowel interpolation
-       compute_envelope3(&info->env, cnt);
-       if(info->p_ac != info->env.vol){
-               FLOAT_T dclevel = 0.0;
-               int vn = info->vowel;
-               double ac1 = info->p_ac = info->env.vol;
-               double ac2 = 1.0 - ac1;
-               for(i = 0; i < 11; i++){
-                       dc[i] = vowel_coeff[vn][i];
-               //      dc[i] = vowel_coeff[vn][i] * ac1 + vc[i] * ac2;
-                       dclevel += dc[i];
-               }
-               info->fmt_level = (dclevel > 1.0) ? (1.0 / dclevel) : 1.0;
-       }       
-       // lfo  
-       for(i = 0; i < HUMANIZER_PHASE; i++){
-               set_sample_filter_freq(&info->fc[i], HUMANIZER_CUTOFF * pow(HUMANIZER_LFO_DEPTH, sin(info->lfo_count + info->lfo_phase[i])));
-               recalc_filter(&info->fc[i]);
-       }
-       if ((info->lfo_count += (info->lfo_rate * cnt)) >= M_PI2) {info->lfo_count -= M_PI2;}
-       // CH_MONO
-       for (i = 0; i < count; i++)
-       {
-               DATA_T tmp = 0, sum = 0;
-               tmp = buf[i];
-               do_drive_mono(&info->drv, &tmp);
-               tmp = tmp * dc[0] + db[1] * dc[1] + db[2] * dc[2] + db[3] * dc[3]
-                       + db[4] * dc[4] + db[5] * dc[5] + db[6] * dc[6] + db[7] * dc[7]
-                       + db[8] * dc[8] + db[9] * dc[9] + db[10] * dc[10];      
-               tmp *= info->fmt_level;
-               db[10] = db[9];
-               db[9] = db[8];
-               db[8] = db[7];
-               db[7] = db[6];
-               db[6] = db[5];
-               db[5] = db[4];
-               db[4] = db[3];
-               db[3] = db[2];
-               db[2] = db[1];                     
-               db[1] = tmp;    
-               for(k = 0; k < HUMANIZER_PHASE; k++){
-                       DATA_T flt = tmp;
-                       sample_filter(&info->fc[k], &flt);
-                       sum += flt;
-               }
-               sum = imuldiv24(sum, info->leveli);
-               buf[i] = sum;
-               i++;
-               buf[i] = sum;
-       }
-}
-#else /* floating-point implementation */
 static void do_humanizer(DATA_T *buf, int32 count, InfoHumanizer *info)
 {
        int i, k, cnt = count / 2;
-       FLOAT_T *dc = info->dc, *vc = info->vc;
-       FLOAT_T *db = info->db;
 
        if(count == MAGIC_INIT_EFFECT_INFO) {           
                if(!info->init){
-                       FLOAT_T dclevel = 0.0;
                        FLOAT_T div_phase = 1.0 / (FLOAT_T)HUMANIZER_PHASE;
                        info->init = 1;
                        info->p_vowel = info->vowel;
-                       for(i = 0; i < 11; i++){
-                               info->dc[i] = info->vc[i] = vowel_coeff[info->vowel][i];
-                               dclevel += dc[i];
-                       }
-                       info->fmt_level = (dclevel > 1.0) ? (1.0 / dclevel) : 1.0;
-                       memset(info->db, 0, sizeof(info->db));
                        info->p_accel = info->accel;
-                       init_envelope3(&info->env, 1.0, info->accel * playmode_rate_ms);
-                       info->p_ac = info->env.vol;
-                       // lfo
-                       info->lfo_rate = HUMANIZER_LFO_RATE * div_playmode_rate * M_PI2;
-                       info->lfo_count = 0; // min point
-                       for(i = 0; i < HUMANIZER_PHASE; i++){
-                               info->lfo_phase[i] = (FLOAT_T)i * div_phase * M_PI2;
-                               init_sample_filter(&info->fc[i], HUMANIZER_CUTOFF, HUMANIZER_RESO, i & 0x1 ? FILTER_BCF12_3 : FILTER_BPF12_3);
+                       for(k = 0; k < HUMANIZER_PHASE; k++){
+                               init_sample_filter(&info->fc[k], HUMANIZER_CUTOFF * humanizer_cf[info->vowel][k], HUMANIZER_RESO, FILTER_BPF12_3);
+                               init_envelope3(&info->env[k], HUMANIZER_CUTOFF * humanizer_cf[info->vowel][k], info->accel * playmode_rate_ms);
                        }
-                       info->flt_level = pow((FLOAT_T)div_phase, 0.666666666);
+                       init_sample_filter(&info->fc2, HUMANIZER_CUTOFF * humanizer_cf2[info->vowel], 0, FILTER_BCF12_3);
+                       init_envelope3(&info->env2, HUMANIZER_CUTOFF * humanizer_cf2[info->vowel], info->accel * playmode_rate_ms);     
                }
                if(info->od_sw)
                        init_drive(&info->drv, 4, 0.2, 0.7, info->drive * otd.efx_CustomHmnLvIn);
                else
-                       init_drive(&info->drv, 0, 0.0, 0.7, 1.0);
-               info->leveld = info->flt_level * HUMANIZER_LEVEL * otd.efx_CustomHmnLvOut;
+                       init_drive(&info->drv, 0, 0.0, 0.7, info->drive * otd.efx_CustomHmnLvIn);
                if(info->p_accel != info->accel){
                        info->p_accel = info->accel;
-                       reset_envelope3(&info->env, 1.0, info->accel * playmode_rate_ms);
+                       for(k = 0; k < HUMANIZER_PHASE; k++)
+                               reset_envelope3(&info->env[k], HUMANIZER_CUTOFF * humanizer_cf[info->vowel][k], info->accel * playmode_rate_ms);
+                       reset_envelope3(&info->env2, HUMANIZER_CUTOFF * humanizer_cf2[info->vowel], info->accel * playmode_rate_ms);
                }
                if(info->p_vowel != info->vowel){
-                       info->p_vowel = info->vowel;
-                       for(i = 0; i < 11; i++)
-                               info->vc[i] = info->dc[i];
-                       init_envelope3(&info->env, 0.0, info->accel * playmode_rate_ms);
-                       reset_envelope3(&info->env, 1.0, ENVELOPE_KEEP);
-                       info->p_ac = -1;
-               }                       
+                       info->p_vowel = info->vowel;                    
+                       for(k = 0; k < HUMANIZER_PHASE; k++)
+                               reset_envelope3(&info->env[k], HUMANIZER_CUTOFF * humanizer_cf[info->vowel][k], ENVELOPE_KEEP);
+                       reset_envelope3(&info->env2, HUMANIZER_CUTOFF * humanizer_cf2[info->vowel], ENVELOPE_KEEP);
+               }       
+               info->inleveld = otd.efx_CustomHmnLvIn;
+               info->leveld = HUMANIZER_LEVEL * otd.efx_CustomHmnLvOut;
+               info->inleveli = TIM_FSCALE(info->inleveld, 24);
+               info->leveli = TIM_FSCALE(info->leveld, 24);            
                return;
        } else if(count == MAGIC_FREE_EFFECT_INFO) {
                info->init = 0;
                return;
-       } else if(count == 0) {
+       } else if(count <= 0) {
                return;
-       }       
+       }
        // vowel interpolation
-       compute_envelope3(&info->env, cnt);
-       if(info->p_ac != info->env.vol){
-               FLOAT_T dclevel = 0.0;
-               int vn = info->vowel;
-               double ac1 = info->p_ac = info->env.vol;
-               double ac2 = 1.0 - ac1;
-               for(i = 0; i < 11; i++){
-                       dc[i] = vowel_coeff[vn][i];
-               //      dc[i] = vowel_coeff[vn][i] * ac1 + vc[i] * ac2;
-                       dclevel += dc[i];
-               }
-               info->fmt_level = (dclevel > 1.0) ? (1.0 / dclevel) : 1.0;
-       }
-       // lfo  
-       for(i = 0; i < HUMANIZER_PHASE; i++){
-               set_sample_filter_freq(&info->fc[i], HUMANIZER_CUTOFF * pow(HUMANIZER_LFO_DEPTH, sin(info->lfo_count + info->lfo_phase[i])));
-               recalc_filter(&info->fc[i]);
-       }
-       if ((info->lfo_count += (info->lfo_rate * cnt)) >= M_PI2) {info->lfo_count -= M_PI2;}
+       for(k = 0; k < HUMANIZER_PHASE; k++){
+               compute_envelope3(&info->env[k], cnt);
+               set_sample_filter_freq(&info->fc[k], info->env[k].vol);
+               recalc_filter(&info->fc[k]);
+       }
        // CH_MONO
-
        for (i = 0; i < count; i++)
        {
                DATA_T tmp = 0, sum = 0;
                tmp = buf[i];
-               do_drive_mono(&info->drv, &tmp);
-               db[0] = tmp;
-       
-#if (USE_X86_EXT_INTRIN >= 3) && defined(FLOAT_T_DOUBLE) && defined(DATA_T_DOUBLE)
-               {
-               __m128d vdc0 = _mm_loadu_pd(&dc[0]), vdc2 = _mm_loadu_pd(&dc[2]), vdc4 = _mm_loadu_pd(&dc[4]),
-                       vdc6 = _mm_loadu_pd(&dc[6]), vdc8 = _mm_loadu_pd(&dc[8]), vdc10 = _mm_loadu_pd(&dc[10]);
-               __m128d vdb0 = _mm_loadu_pd(&db[0]), vdb2 = _mm_loadu_pd(&db[2]), vdb4 = _mm_loadu_pd(&db[4]),
-                       vdb6 = _mm_loadu_pd(&db[6]), vdb8 = _mm_loadu_pd(&db[8]), vdb10 = _mm_loadu_pd(&db[10]);                
-               __m128d vec_tmp = MM_FMA6_PD(vdb0, vdc0, vdb2, vdc2, vdb4, vdc4, vdb6, vdc6, vdb8, vdc8, vdb10, vdc10);
-               __m128d vfmtlv = _mm_load_sd(&info->fmt_level);
-               vec_tmp = _mm_add_pd(vec_tmp, _mm_shuffle_pd(vec_tmp, vec_tmp, 0x1));
-               vec_tmp = _mm_mul_sd(vec_tmp, vfmtlv);
-               _mm_store_sd(&tmp, vec_tmp);
-               vdb0 = _mm_move_sd(vdb0, vec_tmp);
-               _mm_storeu_pd(&db[9], vdb8);
-               _mm_storeu_pd(&db[7], vdb6);
-               _mm_storeu_pd(&db[5], vdb4);
-               _mm_storeu_pd(&db[3], vdb2);
-               _mm_storeu_pd(&db[1], vdb0);
-               }
-               
-#elif (USE_X86_EXT_INTRIN >= 2) && defined(FLOAT_T_FLOAT) && defined(DATA_T_FLOAT)
-               {
-               __m128 vdc0 = _mm_loadu_ps(&dc[0]), vdc4 = _mm_loadu_ps(&dc[4]), vdc8 = _mm_loadu_ps(&dc[8]);
-               __m128 vdb0 = _mm_loadu_ps(&db[0]), vdb4 = _mm_loadu_ps(&db[4]), vdb8 = _mm_loadu_ps(&db[8]);           
-               __m128 vec_tmp = MM_FMA3_PS(vdb0, vdc0, vdb4, vdc4, vdb8, vdc8);
-               __m128 vfmtlv = _mm_set1_ps(info->fmt_level);
-               vec_tmp = _mm_add_ps(vec_tmp, _mm_movehl_ps(vec_tmp, vec_tmp)); // v0=v0+v1 v1=v2+v3
-               vec_tmp = _mm_add_ps(vec_tmp, _mm_shuffle_ps(vec_tmp, vec_tmp, 0xe1)); // v0=v0+v1      
-               vec_tmp = _mm_mul_ss(vec_tmp, vfmtlv);
-               vdb0 = _mm_move_ss(vdb0, vec_tmp);
-               _mm_store_ss(&tmp,  vec_tmp);
-               _mm_storeu_ps(&db[9], vdb8);
-               _mm_storeu_ps(&db[5], vdb4);
-               _mm_storeu_ps(&db[1], vdb0);            
-               }
-
+#if (OPT_MODE == 1) && !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT) /* fixed-point implementation */
+               tmp = imuldiv24(tmp, info->inleveli);
 #else
-               tmp = db[0] * dc[0] + db[1] * dc[1] + db[2] * dc[2] + db[3] * dc[3]
-                       + db[4] * dc[4] + db[5] * dc[5] + db[6] * dc[6] + db[7] * dc[7]
-                       + db[8] * dc[8] + db[9] * dc[9] + db[10] * dc[10];      
-               tmp *= info->fmt_level;
-               db[10] = db[9];
-               db[9] = db[8];
-               db[8] = db[7];
-               db[7] = db[6];
-               db[6] = db[5];
-               db[5] = db[4];
-               db[4] = db[3];
-               db[3] = db[2];
-               db[2] = db[1];                     
-               db[1] = tmp;    
+               tmp *= info->inleveld;
 #endif
+               do_drive_mono(&info->drv, &tmp);
                for(k = 0; k < HUMANIZER_PHASE; k++){
                        DATA_T flt = tmp;
                        sample_filter(&info->fc[k], &flt);
                        sum += flt;
                }
+               sample_filter(&info->fc2, &sum);
+#if (OPT_MODE == 1) && !defined(DATA_T_DOUBLE) && !defined(DATA_T_FLOAT) /* fixed-point implementation */
+               sum = imuldiv24(sum, info->leveli);
+#else
                sum *= info->leveld;
+#endif
                buf[i] = sum;
                i++;
                buf[i] = sum;
        }
 }
-#endif /* OPT_MODE != 0 */
 
 
 ///r
-#define OD_GAIN 1.75
-#define OD_AMP_GAIN 1.3
-#define OD_CLIP_LEVEL 0.75
-#define OD_LEVEL 1.000
+#define OD_GAIN 1.25
+#define OD_AMP_GAIN 1.125
+#define OD_CLIP_LEVEL 0.5
+#define OD_LEVEL 2.0
 #define OD_LEVEL_AMPN (0.50f)
 #define OD_LEVEL_AMP0 (1.00f)
 #define OD_LEVEL_AMP1 (1.00f)
 #define OD_LEVEL_AMP2 (1.00f)
 #define OD_LEVEL_AMP3 (1.00f)
-#define OD_FF1_OD 125.0f
-#define OD_FF1_DS 250.0f
-#define OD_FF2 5000.0f
+#define OD_FF1 125.0f
+#define OD_FF2 2500.0f // DS
 #define OD_FF3 4500.0f
 #define OD_FF4_COEF (1.025)
 #define OD_FF4_AMPN (5999.0f * OD_FF4_COEF)
@@ -12840,15 +12021,15 @@ static void do_od_ds_multi(int32 *buf, int32 count, InfoOverdrive *info)
                /* overdrive distortion */
                if(info->type == 0) { // overdrive
                        init_drive(&info->drv1, 4, 0.8, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
-                       init_drive(&info->drv2, 4, 0.6, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
-                       bw1_freq = OD_FF1_OD;
+                       init_drive(&info->drv2, 0, 0.0, OD_CLIP_LEVEL, 1.0);
+                       bw1_freq = OD_FF1;
                        flt2_type = FILTER_NONE;
                        flt3_type = FILTER_LPF6;
                }else{ // distortion
-                       init_drive(&info->drv1, 9, 0.5, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
-                       init_drive(&info->drv2, 9, 0.2, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
-                       bw1_freq = OD_FF1_DS;
-                       flt2_type = FILTER_LPF_BW;
+                       init_drive(&info->drv1, 4, 0.9, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
+                       init_drive(&info->drv2, 4, 0.7, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
+                       bw1_freq = OD_FF1;
+                       flt2_type = FILTER_LPF24_2;
                        flt3_type = FILTER_LPF6;
                }
                /* waveshaper amp simulator */
@@ -13061,15 +12242,15 @@ static void do_od_ds_multi(DATA_T *buf, int32 count, InfoOverdrive *info)
                /* overdrive distortion */
                if(info->type == 0) { // overdrive
                        init_drive(&info->drv1, 4, 0.8, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
-                       init_drive(&info->drv2, 4, 0.6, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
-                       bw1_freq = OD_FF1_OD;
+                       init_drive(&info->drv2, 0, 0.0, OD_CLIP_LEVEL, 1.0);
+                       bw1_freq = OD_FF1;
                        flt2_type = FILTER_NONE;
                        flt3_type = FILTER_LPF6;
                }else{ // distortion
-                       init_drive(&info->drv1, 9, 0.5, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
-                       init_drive(&info->drv2, 9, 0.2, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
-                       bw1_freq = OD_FF1_DS;
-                       flt2_type = FILTER_LPF_BW;
+                       init_drive(&info->drv1, 4, 0.9, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
+                       init_drive(&info->drv2, 4, 0.7, OD_CLIP_LEVEL, calc_gs_drive_gain(info->drive));
+                       bw1_freq = OD_FF1;
+                       flt2_type = FILTER_LPF24_2;
                        flt3_type = FILTER_LPF6;
                }
                /* waveshaper amp simulator */
@@ -13131,6 +12312,7 @@ static void do_od_ds_multi(DATA_T *buf, int32 count, InfoOverdrive *info)
                init_sample_filter2(bq, OD_FF5 * otd.gsefx_CustomODFreq, 0, OD_FQ5, FILTER_BIQUAD_LOW);
                return;
        }
+
        switch(info->mode){
        case CH_STEREO:
                for(i = 0; i < count; i++) {
@@ -13234,10 +12416,10 @@ static void do_od_ds_parallel(DATA_T *buf, int32 count, InfoOverdrive *info1, In
 
        if(count == MAGIC_INIT_EFFECT_INFO) {
                do_od_ds_multi(buf, count, info1);
-               do_od_ds_multi(buf, count, info2);
+               do_od_ds_multi(buf, count, info2);              
                return;
        }else if(count <= 0)
-               return;
+               return; 
        for(i = 0; i < count; i++) {
                input[0] = buf[i]; input[1] = buf[i + 1];
                /* waveshaping amp simulation anti-aliasing */
@@ -13544,11 +12726,11 @@ static void do_phaser_multi(DATA_T *buf, int32 count, InfoPhaser *info)
 #define AW_LEVEL (1.0)
 #define AW_LPF_LEVEL (2.5 * AW_LEVEL)
 #define AW_BPF_LEVEL (5.0 * AW_LEVEL)
-#define AW_PEAK_TIME (5.0) // ms
-#define AW_SENS_ATTACK_TIME (10.0) // min ms 
-#define AW_SENS_RELEASE_TIME (10.0) // min ms 
-#define AW_MAN_ENV_TIME (10.0) // ms
-#define AW_FLT_ENV_TIME (5.0) // ms
+#define AW_PEAK_TIME (20.0) // ms
+#define AW_SENS_ATTACK_TIME (20.0) // min ms 
+#define AW_SENS_RELEASE_TIME (200.0) // min ms 
+#define AW_MAN_ENV_TIME (100.0) // ms
+#define AW_FLT_ENV_TIME (10.0) // ms
 #define AW_BPF_WIDTH (0.85)
 #define AW_DEPTH_MAX (10.66666666666666666666666666666) // 128 * DIV_12 // from XG AutoWah
 
@@ -13581,6 +12763,7 @@ static void do_auto_wah_multi(DATA_T *buf, int32 count, InfoAutoWah *info)
 {
        int32 i, cnt = count / 2;
        DATA_T tmp, tmpm;
+       FLOAT_T cutoff;
 
        if(count == MAGIC_INIT_EFFECT_INFO) {
                //info->type = 1; // 0:lpf 1:bpf
@@ -13607,7 +12790,7 @@ static void do_auto_wah_multi(DATA_T *buf, int32 count, InfoAutoWah *info)
                                set_sample_filter_type(&info->fc, FILTER_LPF_BW);
                        }
                        info->ptype = info->type;
-                       init_envelope3(&info->man_env, info->manual, AW_SENS_ATTACK_TIME * playmode_rate_ms);
+                       init_envelope3(&info->man_env, info->manual, AW_MAN_ENV_TIME * playmode_rate_ms);
                        set_sample_filter_freq(&info->fc, info->manual);
                        set_sample_filter_reso(&info->fc, info->peak);
                }
@@ -13646,13 +12829,14 @@ static void do_auto_wah_multi(DATA_T *buf, int32 count, InfoAutoWah *info)
        } else if(count == MAGIC_FREE_EFFECT_INFO) {
                info->init = 0;
                return;
-       } else if(count == 0) {
+       } else if(count <= 0) {
                return;
        }
+
        compute_envelope3(&info->man_env, cnt);
-       if(info->sens_sw && info->lfo_sw){
-               FLOAT_T cutoff, sens_out, lfo_out;
-               // peak level
+       cutoff = info->man_env.vol;
+       if(info->sens_sw){ // sens
+               FLOAT_T sens_out;
                if((info->peak_rate += info->peak_count * cnt) >= 1.0){
                        FLOAT_T sens_level = info->peak_level * info->sens_coef;
                        info->peak_rate -= floor(info->peak_rate); // reset count
@@ -13669,55 +12853,24 @@ static void do_auto_wah_multi(DATA_T *buf, int32 count, InfoAutoWah *info)
                        }
                        info->peak_level = 0;
                }
-               // lfo
-               if((info->lfo_rate += info->lfo_freq * cnt) >= 1.0)
-                       info->lfo_rate -= floor(info->lfo_rate); // reset count
-               lfo_out = (info->lfo_rate < 0.5) ? (info->lfo_rate * 2.0) : (info->lfo_rate * -2.0 + 2.0); // triangular // lfo_out 0.0~<1.0    
-               // cutoff
-               cutoff = info->man_env.vol * POW2(info->sens_env.vol * info->sens_mult) * POW2(lfo_out * info->depth_mult);
-               cutoff = clip_FLOAT_T(cutoff, manual_table[0], manual_table[127]);
-               reset_envelope3(&info->flt_env, cutoff, ENVELOPE_KEEP);
-       }else if(info->sens_sw){        
-               FLOAT_T cutoff, sens_out;
-               // peak level
-               if((info->peak_rate += info->peak_count * cnt) >= 1.0){
-                       FLOAT_T sens_level = info->peak_level * info->sens_coef;
-                       info->peak_rate -= floor(info->peak_rate); // reset count
-                       if(sens_level > 1.0)
-                               sens_level = 1.0;
-                       if(sens_level > info->sens_level){ 
-                               info->env_mode = 0; // attck
-                               info->sens_level = sens_level;
-                               reset_envelope3(&info->sens_env, sens_level, info->attack_cnt * (sens_level - info->sens_level));
-                       }else if(sens_level < info->sens_level && (info->env_mode || !check_envelope3(&info->sens_env))){ // after attack time
-                               info->env_mode = 1; // release
-                               info->sens_level = sens_level;
-                               reset_envelope3(&info->sens_env, sens_level, info->release_cnt * (info->sens_level - sens_level));
-                       }
-                       info->peak_level = 0;
-               }
                compute_envelope3(&info->sens_env, cnt);
-               // cutoff
-               cutoff = info->man_env.vol * POW2(info->sens_env.vol * info->sens_mult);
-               cutoff = clip_FLOAT_T(cutoff, manual_table[0], manual_table[127]);
-               reset_envelope3(&info->flt_env, cutoff, ENVELOPE_KEEP);
-       }else if(info->lfo_sw){ 
-               FLOAT_T cutoff, lfo_out;        
-               // lfo  
+               cutoff *= POW2(info->sens_env.vol * info->sens_mult);   
+       }
+       if(info->lfo_sw){       // lfo
+               FLOAT_T lfo_out;
                if((info->lfo_rate += info->lfo_freq * cnt) >= 1.0)
                        info->lfo_rate -= floor(info->lfo_rate); // reset count
                lfo_out = (info->lfo_rate < 0.5) ? (info->lfo_rate * 2.0) : (info->lfo_rate * -2.0 + 2.0); // triangular // lfo_out 0.0~<1.0    
-               // cutoff
-               cutoff = info->man_env.vol * POW2(lfo_out * info->depth_mult);
-               cutoff = clip_FLOAT_T(cutoff, manual_table[0], manual_table[127]);
-               reset_envelope3(&info->flt_env, cutoff, ENVELOPE_KEEP);
-       }else
-               reset_envelope3(&info->flt_env, info->man_env.vol, ENVELOPE_KEEP);
+               cutoff *= POW2(lfo_out * info->depth_mult);
+       }
+       // cutoff
+       cutoff = clip_FLOAT_T(cutoff, manual_table[0], manual_table[127]);
+       reset_envelope3(&info->flt_env, cutoff, ENVELOPE_KEEP);
        compute_envelope3(&info->flt_env, cnt);
        // filter
        set_sample_filter_freq(&info->fc, info->flt_env.vol);
        recalc_filter(&info->fc);
-
+       
        switch(info->mode){
        case CH_STEREO:
                for (i = 0; i < count; i++)
@@ -15920,8 +15073,8 @@ static void do_echo(DATA_T *buf, int32 count, InfoEcho *info)
 
 
 ///r
-#define XG_OD_DRIVE 1.75
-#define XG_OD_CLIP_LEVEL 0.75
+#define XG_OD_DRIVE 1.25
+#define XG_OD_CLIP_LEVEL 0.5
 #define XG_OD_PDL0_GAIN 1.00 // overdrive
 #define XG_OD_PDL1_GAIN 1.00 // distortion
 #define XG_OD_PDL2_GAIN 1.00 // overdrive2
@@ -15947,9 +15100,9 @@ static void do_echo(DATA_T *buf, int32 count, InfoEcho *info)
 #define XG_OD_AMP4_LEVEL 1.00
 #define XG_OD_AMP5_LEVEL 1.00
 #define XG_OD_AMP6_LEVEL 1.00
-#define XG_OD_LEVEL 1.0
-#define XG_OD_FF1 150.0f
-#define XG_OD_FF2 5500.0f
+#define XG_OD_LEVEL 2.0
+#define XG_OD_FF1 125.0f
+#define XG_OD_FF2 2500.0f
 #define XG_OD_FF3 4800.0f
 #define XG_OD_FF4_COEF (1.025)
 #define XG_OD_FF4_TYPE0 (5999.0f * XG_OD_FF4_COEF) // off
@@ -15985,8 +15138,8 @@ static void do_stereo_od(int32 *buf, int32 count, InfoStereoOD *info)
                switch (info->od_type) {
                default:
                case 0: // overdrive
-                       init_drive(&info->drv1, 4, 0.8, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL0_GAIN);
-                       init_drive(&info->drv2, 4, 0.6, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL0_GAIN);
+                       init_drive(&info->drv1, 4, 0.7, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL0_GAIN);
+                       init_drive(&info->drv2, 0, 0.0, XG_OD_CLIP_LEVEL, 1.0);
                        bw2_type = FILTER_NONE;
                        break;
                case 1: // distortion
@@ -15995,8 +15148,8 @@ static void do_stereo_od(int32 *buf, int32 count, InfoStereoOD *info)
                        bw2_type = FILTER_LPF_BW;
                        break;
                case 2: // overdrive2
-                       init_drive(&info->drv1, 4, 0.7, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL2_GAIN);
-                       init_drive(&info->drv2, 4, 0.7, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL2_GAIN);
+                       init_drive(&info->drv1, 4, 0.8, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL2_GAIN);
+                       init_drive(&info->drv2, 0, 0.0, XG_OD_CLIP_LEVEL, 1.0);
                        bw2_type = FILTER_NONE;
                        break;
                case 3: // distortion2
@@ -16006,7 +15159,7 @@ static void do_stereo_od(int32 *buf, int32 count, InfoStereoOD *info)
                        break;
                case 4: // solid od
                        init_drive(&info->drv1, 4, 0.5, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL4_GAIN);
-                       init_drive(&info->drv2, 4, 0.5, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL4_GAIN);
+                       init_drive(&info->drv2, 0, 0.0, XG_OD_CLIP_LEVEL, 1.0);
                        bw2_type = FILTER_NONE;
                        break;
                case 5: // solid ds
@@ -16016,7 +15169,7 @@ static void do_stereo_od(int32 *buf, int32 count, InfoStereoOD *info)
                        break;
                case 6: // tube od
                        init_drive(&info->drv1, 4, 1.0, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL6_GAIN);
-                       init_drive(&info->drv2, 4, 0.6, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL6_GAIN);
+                       init_drive(&info->drv2, 0, 0.0, XG_OD_CLIP_LEVEL, 1.0);
                        bw2_type = FILTER_NONE;
                        break;
                case 7: // tube ds
@@ -16026,7 +15179,7 @@ static void do_stereo_od(int32 *buf, int32 count, InfoStereoOD *info)
                        break;
                case 8: // vintage
                        init_drive(&info->drv1, 4, 0.3, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL8_GAIN);
-                       init_drive(&info->drv2, 4, 0.3, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL8_GAIN);
+                       init_drive(&info->drv2, 0, 0.0, XG_OD_CLIP_LEVEL, 1.0);
                        bw2_type = FILTER_NONE;
                        break;
                case 9: // fuzz
@@ -16202,7 +15355,7 @@ static void do_stereo_od(DATA_T *buf, int32 count, InfoStereoOD *info)
                default:
                case 0: // overdrive
                        init_drive(&info->drv1, 4, 0.8, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL0_GAIN);
-                       init_drive(&info->drv2, 4, 0.6, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL0_GAIN);
+                       init_drive(&info->drv2, 0, 0.0, XG_OD_CLIP_LEVEL, 1.0);
                        bw2_type = FILTER_NONE;
                        break;
                case 1: // distortion
@@ -16212,7 +15365,7 @@ static void do_stereo_od(DATA_T *buf, int32 count, InfoStereoOD *info)
                        break;
                case 2: // overdrive2
                        init_drive(&info->drv1, 4, 0.7, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL2_GAIN);
-                       init_drive(&info->drv2, 4, 0.7, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL2_GAIN);
+                       init_drive(&info->drv2, 0, 0.0, XG_OD_CLIP_LEVEL, 1.0);
                        bw2_type = FILTER_NONE;
                        break;
                case 3: // distortion2
@@ -16222,7 +15375,7 @@ static void do_stereo_od(DATA_T *buf, int32 count, InfoStereoOD *info)
                        break;
                case 4: // solid od
                        init_drive(&info->drv1, 4, 0.5, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL4_GAIN);
-                       init_drive(&info->drv2, 4, 0.5, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL4_GAIN);
+                       init_drive(&info->drv2, 0, 0.0, XG_OD_CLIP_LEVEL, 1.0);
                        bw2_type = FILTER_NONE;
                        break;
                case 5: // solid ds
@@ -16232,7 +15385,7 @@ static void do_stereo_od(DATA_T *buf, int32 count, InfoStereoOD *info)
                        break;
                case 6: // tube od
                        init_drive(&info->drv1, 4, 1.0, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL6_GAIN);
-                       init_drive(&info->drv2, 4, 0.6, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL6_GAIN);
+                       init_drive(&info->drv2, 0, 0.0, XG_OD_CLIP_LEVEL, 1.0);
                        bw2_type = FILTER_NONE;
                        break;
                case 7: // tube ds
@@ -16242,7 +15395,7 @@ static void do_stereo_od(DATA_T *buf, int32 count, InfoStereoOD *info)
                        break;
                case 8: // vintage
                        init_drive(&info->drv1, 4, 0.3, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL8_GAIN);
-                       init_drive(&info->drv2, 4, 0.3, XG_OD_CLIP_LEVEL, calc_xg_drive_gain(info->drive) * XG_OD_PDL8_GAIN);
+                       init_drive(&info->drv2, 0, 0.0, XG_OD_CLIP_LEVEL, 1.0);
                        bw2_type = FILTER_NONE;
                        break;
                case 9: // fuzz
@@ -16494,15 +15647,15 @@ static void do_lofi(DATA_T *buf, int32 count, InfoLoFi *info)
        if(count == MAGIC_INIT_EFFECT_INFO) {
                // sample rate
                if(info->sr_rate < play_mode->rate)
-                       init_sample_filter2(sr_fil, info->sr_rate * DIV_2, 0.0, 0.0, FILTER_LPF24_2x2);
+                       init_sample_filter(sr_fil, info->sr_rate * DIV_2, 0.0, FILTER_LPF24_2x2);
                else
-                       init_sample_filter2(sr_fil, 0, 0, 0, FILTER_NONE);      
+                       init_sample_filter(sr_fil, 0, 0, FILTER_NONE);  
                // pre filter
-               init_sample_filter2(pre_fil, info->pre_fil_freq, info->pre_fil_reso, 0.0, info->pre_fil_type);
+               init_sample_filter(pre_fil, info->pre_fil_freq, info->pre_fil_reso, info->pre_fil_type);
                // post filter
-               init_sample_filter2(post_fil, info->post_fil_freq, info->post_fil_reso, 0.0, info->post_fil_type);
-               info->level_up = pow(2.0, INS_LOFI_BIT - info->bit_length);
-               info->level_down = 1.0 / info->level_up;
+               init_sample_filter(post_fil, info->post_fil_freq, info->post_fil_reso, info->post_fil_type);
+               info->level_down = pow(2.0, -INS_LOFI_BIT + info->bit_length);
+               info->level_up = 1.0 / info->level_down;
                if(info->nz_gen){
                        if(info->wp_sel){
                                init_pink_noise(pnzl);
@@ -21166,35 +20319,6 @@ static void free_effect_sd(struct mfx_effect_sd_t *st)
        st->ef = NULL;  
 }
 
-void free_effect_buffers(void)
-{
-       int i;
-       /* free GM/GS/GM2 effects */
-       do_ch_standard_reverb(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
-       do_ch_freeverb(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
-       do_ch_reverb_ex(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
-       do_ch_reverb_ex2(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
-       do_ch_plate_reverb(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
-       do_ch_reverb_normal_delay(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
-       do_ch_chorus_free(NULL, MAGIC_FREE_EFFECT_INFO, &(chorus_status_gs.info_stereo_chorus));
-       do_ch_3tap_delay(NULL, MAGIC_FREE_EFFECT_INFO, &(delay_status_gs.info_delay));
-       free_effect_gs(&insertion_effect_gs);
-       /* free XG effects */
-       free_effect_xg(&reverb_status_xg);
-       free_effect_xg(&chorus_status_xg);
-       for (i = 0; i < XG_VARIATION_EFFECT_NUM; i++) {
-               free_effect_xg(&variation_effect_xg[i]);
-       }
-       for (i = 0; i < XG_INSERTION_EFFECT_NUM; i++) {
-               free_effect_xg(&insertion_effect_xg[i]);
-       }
-       free_effect_sd(&reverb_status_sd);
-       free_effect_sd(&chorus_status_sd);
-       for (i = 0; i < SD_MFX_EFFECT_NUM; i++) {
-               free_effect_sd(&mfx_effect_sd[i]);
-       }               
-}
-
 
 
 /********************************** GS EFFECT conv **********************************/
@@ -21362,8 +20486,8 @@ static void do_gs_overdrive(DATA_T *buf, int32 count, EffectList *ef)
 {
        Info_GS_Overdrive *info = (Info_GS_Overdrive *)ef->info;
        InfoOverdrive *od = &info->od;
-       
-       effect_mix_mono(buf, count);
+
+       effect_mix_mono(buf, count);    
        do_od_ds_multi(buf, count, od);
        effect_panning_mono_stereo(buf, count, info->panl, info->panr);
        effect_level_stereo(buf, count, info->level);
@@ -22420,10 +21544,103 @@ static void conv_gs_lofi1(struct insertion_effect_gs_t *st, EffectList *ef)
        lf->level_in = otd.gsefx_CustomLFLvIn;
        lf->level_out = otd.gsefx_CustomLFLvOut;
        lf->sr_rate = 32000;
-       lf->pre_fil_type = st->parameter[0] ? FILTER_LPF_BW : FILTER_NONE; // off,lpf
-       lf->pre_fil_freq = 16000.0 / (FLOAT_T)(clip_int(st->parameter[0], 0, 5) + 1);
-       lf->pre_fil_reso = 0.0;
-       lf->bit_length = 12 - clip_int(st->parameter[1], 0, 8); 
+       switch(st->parameter[0]){
+       case 0:
+       default:
+               lf->pre_fil_type = FILTER_NONE;
+               lf->pre_fil_freq = 16000.0;
+               lf->pre_fil_reso = 0.0;
+               break;
+       case 1:
+               lf->pre_fil_type = FILTER_LPF12_2;
+               lf->pre_fil_freq = 3500.0;
+               lf->pre_fil_reso = 3.0;
+               break;
+       case 2:
+               lf->pre_fil_type = FILTER_LPF12_2;
+               lf->pre_fil_freq = 2500.0;
+               lf->pre_fil_reso = 0.0;
+               break;
+       case 3:
+               lf->pre_fil_type = FILTER_LPF12_2;
+               lf->pre_fil_freq = 3500.0;
+               lf->pre_fil_reso = 6.0;
+               break;
+       case 4:
+               lf->pre_fil_type = FILTER_LPF12_2;
+               lf->pre_fil_freq = 2500.0;
+               lf->pre_fil_reso = 6.0;
+               break;
+       case 5:
+               lf->pre_fil_type = FILTER_LPF12_2;
+               lf->pre_fil_freq = 3500.0;
+               lf->pre_fil_reso = 12.0;
+               break;
+
+       }
+       switch(st->parameter[1]){
+       case 0:
+       default:
+               lf->bit_length = 10;
+               break;
+       case 1:
+               lf->bit_length = 8;
+               break;
+       case 2:
+               lf->bit_length = 4;
+               break;
+       case 3:
+               lf->bit_length = 9;
+               break;
+       case 4:
+               lf->bit_length = 7;
+               break;
+       case 5:
+               lf->bit_length = 11;
+               break;
+       case 6:
+               lf->bit_length = 9;
+               break;
+       case 7:
+               lf->bit_length = 6;
+               break;
+       case 8:
+               lf->bit_length = 4;
+               break;
+       }       
+       switch(st->parameter[2]){
+       case 0:
+       default:
+               lf->post_fil_type = FILTER_NONE;
+               lf->post_fil_freq = 10.0;
+               lf->post_fil_reso = 0.0;
+               break;
+       case 1:
+               lf->post_fil_type = FILTER_HPF12_2;
+               lf->post_fil_freq = 100.0;
+               lf->post_fil_reso = 0.0;
+               break;
+       case 2:
+               lf->post_fil_type = FILTER_HPF12_2;
+               lf->post_fil_freq = 400.0;
+               lf->post_fil_reso = 0.0;
+               break;
+       case 3:
+               lf->post_fil_type = FILTER_HPF12_2;
+               lf->post_fil_freq = 400.0;
+               lf->post_fil_reso = 6.0;
+               break;
+       case 4:
+               lf->post_fil_type = FILTER_HPF12_2;
+               lf->post_fil_freq = 1200.0;
+               lf->post_fil_reso = 0.0;
+               break;
+       case 5:
+               lf->post_fil_type = FILTER_HPF12_2;
+               lf->post_fil_freq = 1200.0;
+               lf->post_fil_reso = 6.0;
+               break;
+       }
        lf->post_fil_type = st->parameter[2] ? FILTER_LPF_BW : FILTER_NONE; // off,lpf
        lf->post_fil_freq = 16000.0 / (FLOAT_T)(clip_int(st->parameter[2], 0, 5) + 1);
        lf->post_fil_reso = 0.0;                
@@ -25886,9 +25103,9 @@ static void conv_xg_comp_distortion(struct effect_xg_t *st, EffectList *ef)
        cmp1->sustain = cmp2->sustain = compressor_release_time_table_xg[clip_int(st->param_lsb[12], 0, 15)]; // 10ms-500ms
        cmp1->pre_gain = cmp2->pre_gain = (double)(127 - clip_int(st->param_lsb[13], 79, 121)); // dB
        cmp1->post_gain = cmp2->post_gain = 0.0; // dB
-       cmp1->threshold = cmp2->threshold = 1.0; // threshold(~1.0)
+       cmp1->threshold = cmp2->threshold = 0.944; // threshold(~1.0)
        cmp1->slope = cmp2->slope = 1.0 / compressor_ratio_table_xg[clip_int(st->param_lsb[14], 0, 7)]; // ratio?
-       cmp1->div_level_0db = cmp2->div_level_0db = div_ins_level * DIV_2;
+       cmp1->div_level_0db = cmp2->div_level_0db = div_ins_level * DIV_8;
 }
 
 static void do_xg_comp_distortion(DATA_T *buf, int32 count, EffectList *ef)
@@ -26638,9 +25855,9 @@ static void conv_xg_compressor(struct effect_xg_t *st, EffectList *ef)
        cmp->sustain = compressor_release_time_table_xg[clip_int(st->param_lsb[1], 0, 15)]; // 10ms-500ms
        cmp->pre_gain = (double)(127 - clip_int(st->param_lsb[2], 79, 121)); // dB
        cmp->post_gain = 0.0; // dB
-       cmp->threshold = 1.0; // threshold(~1.0)
+       cmp->threshold = 0.944; // threshold(~1.0)
        cmp->slope = 1.0 / compressor_ratio_table_xg[clip_int(st->param_lsb[3], 0, 7)]; // ratio?
-       cmp->div_level_0db = div_ins_level * DIV_2;
+       cmp->div_level_0db = div_ins_level * DIV_8;
        info->level = calc_xg_level(st->param_lsb[4], st);
 }
 
@@ -27139,9 +26356,9 @@ static void conv_xg_comp_ds_delay(struct effect_xg_t *st, EffectList *ef)
        cmp->sustain = compressor_release_time_table_xg[clip_int(st->param_lsb[11], 0, 15)]; // 10ms-500ms
        cmp->pre_gain = (double)(127 - clip_int(st->param_lsb[12], 79, 121)); // dB
        cmp->post_gain = 0.0; // dB
-       cmp->threshold = 1.0; // threshold(~1.0)
+       cmp->threshold = 0.944; // threshold(~1.0)
        cmp->slope = 1.0 / compressor_ratio_table_xg[clip_int(st->param_lsb[13], 0, 7)]; // ratio?
-       cmp->div_level_0db = div_ins_level * DIV_2;
+       cmp->div_level_0db = div_ins_level * DIV_8;
 }
 
 static void do_xg_comp_ds_delay(DATA_T *buf, int32 count, EffectList *ef)
@@ -27205,9 +26422,9 @@ static void conv_xg_comp_od_delay(struct effect_xg_t *st, EffectList *ef)
        cmp->sustain = compressor_release_time_table_xg[clip_int(st->param_lsb[11], 0, 15)]; // 10ms-500ms
        cmp->pre_gain = (double)(127 - clip_int(st->param_lsb[12], 79, 121)); // dB
        cmp->post_gain = 0.0; // dB
-       cmp->threshold = 1.0; // threshold(~1.0)
+       cmp->threshold = 0.944; // threshold(~1.0)
        cmp->slope = 1.0 / compressor_ratio_table_xg[clip_int(st->param_lsb[13], 0, 7)]; // ratio?
-       cmp->div_level_0db = div_ins_level * DIV_2;
+       cmp->div_level_0db = div_ins_level * DIV_8;
 }
 
 static void do_xg_comp_od_delay(DATA_T *buf, int32 count, EffectList *ef)
@@ -27707,9 +26924,9 @@ static void conv_xg_comp_ds_tempo_delay(struct effect_xg_t *st, EffectList *ef)
        cmp->sustain = compressor_release_time_table_xg[clip_int(st->param_lsb[11], 0, 15)]; // 10ms-500ms
        cmp->pre_gain = (double)(127 - clip_int(st->param_lsb[12], 79, 121)); // dB
        cmp->post_gain = 0.0;
-       cmp->threshold = 1.0; // threshold(~1.0)
+       cmp->threshold = 0.944; // threshold(~1.0)
        cmp->slope = 1.0 / compressor_ratio_table_xg[clip_int(st->param_lsb[13], 0, 7)]; // ratio?
-       cmp->div_level_0db = div_ins_level * DIV_2;
+       cmp->div_level_0db = div_ins_level * DIV_8;
 }
 
 static void do_xg_comp_ds_tempo_delay(DATA_T *buf, int32 count, EffectList *ef)
@@ -27773,9 +26990,9 @@ static void conv_xg_comp_od_tempo_delay(struct effect_xg_t *st, EffectList *ef)
        cmp->sustain = compressor_release_time_table_xg[clip_int(st->param_lsb[11], 0, 15)]; // 10ms-500ms
        cmp->pre_gain = (double)(127 - clip_int(st->param_lsb[12], 79, 121)); // dB
        cmp->post_gain = 0.0; // dB
-       cmp->threshold = 1.0 ; // threshold(~1.0)
+       cmp->threshold = 0.944; // threshold(~1.0)
        cmp->slope = 1.0 / compressor_ratio_table_xg[clip_int(st->param_lsb[13], 0, 7)]; // ratio?
-       cmp->div_level_0db = div_ins_level * DIV_2;
+       cmp->div_level_0db = div_ins_level * DIV_8;
 }
 
 static void do_xg_comp_od_tempo_delay(DATA_T *buf, int32 count, EffectList *ef)
@@ -35648,11 +34865,94 @@ void test_fft(DATA_T *buf, int32 count)
 #endif
 
 
-
 #ifdef TEST_FIR_EQ
 FIR_EQ test_fir_eq;
 #endif
 
+
+
+#ifdef _DEBUG
+//#define TEST_FX
+#endif
+
+#ifdef TEST_FX
+
+#define TEST_FX_PHASE 8
+#define TEST_FX_RATE1 0.5
+#define TEST_FX_CUTOFF 1000
+#define TEST_FX_RESO 48
+
+typedef struct {
+       double lfo_count, lfo_rate, lfo_phase[TEST_FX_PHASE];
+       FilterCoefficients fc[TEST_FX_PHASE];
+       Envelope3 env[TEST_FX_PHASE]; 
+} InfoTestFx;
+
+InfoTestFx info_fx;
+
+FLOAT_T test_fx_cf[5][TEST_FX_PHASE] = {
+0.75, 0.87, 0.94, 0.97, 1.04, 1.12, 1.35, 1.54, // a
+0.99, 1.42, 1.54, 1.67, 1.72, 1.76, 1.79, 1.84, // i
+0.61, 0.63, 0.67, 0.74, 0.76, 0.80, 0.83, 0.85, // u
+0.43, 0.44, 0.48, 0.49, 0.81, 0.93, 0.98, 1.09, // e
+0.43, 0.44, 0.48, 0.49, 0.61, 0.77, 0.88, 0.99, // o
+};
+
+void free_fx(void)
+{
+
+}
+extern int32 test_var[10];
+
+void init_fx(void)
+{
+       InfoTestFx *info = &info_fx;    
+       int k;
+       
+       info->lfo_rate = TEST_FX_RATE1 * div_playmode_rate * M_PI2;
+       info->lfo_count = 0; // min point
+       for(k = 0; k < TEST_FX_PHASE; k++){
+               init_sample_filter(&info->fc[k], TEST_FX_CUTOFF * test_fx_cf[info->count][k], TEST_FX_RESO, FILTER_BPF12_3);
+               init_envelope3(&info->env[k], TEST_FX_CUTOFF * test_fx_cf[info->count][k], 200 * playmode_rate_ms);
+       }       
+}
+       
+void test_fx(DATA_T *buf, int32 count)
+{
+       InfoTestFx *info = &info_fx;
+       int32 i;
+       int k;
+       int32 cnt = count >> 1; 
+       
+       if ((info->lfo_count += (info->lfo_rate * cnt)) >= 1.0) {
+               info->lfo_count = 0;
+               if ((++info->count) >= 5) info->count = 0;      
+               info->count = rand() % 5; 
+               for(k = 0; k < TEST_FX_PHASE; k++){
+                       reset_envelope3(&info->env[k], TEST_FX_CUTOFF * test_fx_cf[info->count][k], ENVELOPE_KEEP);
+               }
+       }       
+       for(k = 0; k < TEST_FX_PHASE; k++){
+               compute_envelope3(&info->env[k], cnt);
+               set_sample_filter_freq(&info->fc[k], info->env[k].vol);
+               recalc_filter(&info->fc[k]);
+       }
+       for (i = 0; i < count; i++)
+       {
+               DATA_T tmp = 0, sum = 0;
+               tmp = buf[i] + buf[i + 1];
+               for(k = 0; k < TEST_FX_PHASE; k++){
+                       DATA_T flt = tmp;
+                       sample_filter(&info->fc[k], &flt);
+                       sum += flt;
+               }
+               buf[i] = sum;
+               i++;
+               buf[i] = sum;
+       }
+}
+#endif
+
 void do_effect(DATA_T *buf, int32 count)
 {
        int32 i;
@@ -35669,6 +34969,9 @@ void do_effect(DATA_T *buf, int32 count)
 #ifdef TEST_FIR_EQ
        apply_fir_eq(&test_fir_eq, buf, nsamples);
 #endif 
+#ifdef TEST_FX
+       test_fx(buf, nsamples);
+#endif
        mix_compressor(buf, nsamples); // elion add.
        /* reverb in mono */
        if (opt_reverb_control && mono)
@@ -35711,6 +35014,41 @@ void do_effect(DATA_T *buf, int32 count)
 }
 
 
+/************************************ free_effect ***************************************/
+
+
+void free_effect_buffers(void)
+{
+       int i;
+       /* free GM/GS/GM2 effects */
+       do_ch_standard_reverb(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
+       do_ch_freeverb(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
+       do_ch_reverb_ex(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
+       do_ch_reverb_ex2(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
+       do_ch_plate_reverb(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
+       do_ch_reverb_normal_delay(NULL, MAGIC_FREE_EFFECT_INFO, &(reverb_status_gs));
+       do_ch_chorus_free(NULL, MAGIC_FREE_EFFECT_INFO, &(chorus_status_gs.info_stereo_chorus));
+       do_ch_3tap_delay(NULL, MAGIC_FREE_EFFECT_INFO, &(delay_status_gs.info_delay));
+       free_effect_gs(&insertion_effect_gs);
+       /* free XG effects */
+       free_effect_xg(&reverb_status_xg);
+       free_effect_xg(&chorus_status_xg);
+       for (i = 0; i < XG_VARIATION_EFFECT_NUM; i++) {
+               free_effect_xg(&variation_effect_xg[i]);
+       }
+       for (i = 0; i < XG_INSERTION_EFFECT_NUM; i++) {
+               free_effect_xg(&insertion_effect_xg[i]);
+       }
+       free_effect_sd(&reverb_status_sd);
+       free_effect_sd(&chorus_status_sd);
+       for (i = 0; i < SD_MFX_EFFECT_NUM; i++) {
+               free_effect_sd(&mfx_effect_sd[i]);
+       }       
+#ifdef TEST_FX
+       free_fx();
+#endif 
+}
+
 
 /************************************ inialize_effect ***************************************/
 
@@ -35744,6 +35082,9 @@ void init_effect(void)
 #ifdef TEST_FIR_EQ
        init_fir_eq(&test_fir_eq);
 #endif
+#ifdef TEST_FX
+       init_fx();
+#endif
        init_mtrand();
        init_pink_noise(&global_pink_noise_light);
        init_ns_tap();
index 09a499b..1dcce94 100644 (file)
@@ -531,7 +531,7 @@ out: 0.0 ~ 8.0 * clip_level
 */
 #define DRIVE_SCALE_BIT (3) // 1.0 * 2^MATH_SCALE_BIT
 #define DRIVE_SCALE_MAX (1 << DRIVE_SCALE_BIT) // table max 1.0 * MATH_SCALE_MAX
-#define DRIVE_BASE_BIT (9) // 0.0~1.0 table size
+#define DRIVE_BASE_BIT (6) // 0.0~1.0 table size
 #define DRIVE_BASE_LENGTH (1 << (DRIVE_BASE_BIT)) // 0.0~1.0:table size
 #define DRIVE_TABLE_LENGTH (1 << (DRIVE_BASE_BIT + DRIVE_SCALE_BIT)) // 0.0~1.0 * MATH_SCALE_MAX table size
 #define DRIVE_FRAC_BIT (14) // for int32
@@ -654,7 +654,7 @@ typedef struct _InfoFreeverb{
                size_rv[FREEVERV_RV][2], index_rv[FREEVERV_RV][2];
        DATA_T hist[2], *buf1[FREEVERV_DELAY1], *buf_ap[FREEVERV_AP][2], *buf_rv[FREEVERV_RV][2], fb_rv[FREEVERV_RV][2];        
        FilterCoefficients lpf1;
-       void (*do_reverb_mode)(DATA_T *buf, int32 count, struct _InfoFreeverb *info);   
+       void (*do_reverb_mode)(DATA_T *buf, int32 count, struct _InfoFreeverb *info);
 } InfoFreeverb;
 
 
@@ -695,11 +695,36 @@ typedef struct _InfoReverbEX{
                , mdelay[REV_EX_UNITS][REV_EX_DELAY], mdepth[REV_EX_UNITS][REV_EX_DELAY];
        FLOAT_T acount[REV_EX_AP_MAX][REV_EX_DELAY], arate[REV_EX_AP_MAX][REV_EX_DELAY], aphase[REV_EX_AP_MAX][REV_EX_DELAY]
                , adelay[REV_EX_AP_MAX][REV_EX_DELAY], adepth[REV_EX_AP_MAX][REV_EX_DELAY];
+       // thread
+       int8 thread;
+       int32 tcount;
+       DATA_T *tibuf; // in
+       DATA_T *tobuf; // out
+       int32 index2t[REV_EX_DELAY2];
 } InfoReverbEX;
 
-//#define REV_EX2
+
+#define REV_EX2
 //#define InfoReverbEX2 InfoReverbEX
 
+typedef struct _InfoReverbEX2{
+       int8 mode, flt_type;
+       double rev_dly_ms, rev_time_sec, rev_width, rev_damp, rev_level, rev_feedback, rev_wet; 
+       double height, width, depth, rev_damp_freq, rev_damp_type, rev_damp_bal, density;
+       double er_time_ms, er_level, level, er_damp_freq, er_roomsize;
+       FLOAT_T levelrv, leveler, feedback, flt_dry, flt_wet, rv_feedback[REV_EX_UNITS], st_sprd, in_level, levelap;
+       int32 levelrvi, leveleri, feedbacki, flt_dryi, flt_weti, rv_feedbacki[REV_EX_UNITS], st_sprdi, in_leveli, levelapi;
+       // IR rev
+       int8 init;
+       int32 frame, srate, wcount, wcycle, rcycle;
+       float *irdata[2], *buf[2];
+       // thread
+       int8 thread;
+       int32 tcount, twcount[4]; // max 4thread
+       float *tbuf[4]; // in*2,out*2
+       float *buf2[2];
+} InfoReverbEX2;
+
 
 /*! 3-Tap Stereo Delay Effect */
 typedef struct _InfoDelay3 {
@@ -876,19 +901,43 @@ typedef struct {
        FilterCoefficients fc, hsh;
 } InfoEnhancer;
 
-#define HUMANIZER_PHASE 4
+#define HUMANIZER_PHASE 10
 typedef struct {
        int8 init, mode, od_sw, vowel, p_vowel;
        double drive, accel;
-       double leveld, p_accel, p_ac, flt_level, fmt_level;
-       double lfo_count, lfo_rate, lfo_phase[HUMANIZER_PHASE];
-       FLOAT_T db[17], dc[16], vc[11];
-       int32 leveli, acceli, ib[11], iv[11];
+       double leveld, p_accel, p_ac, inleveld;
+       int32 leveli, acceli, inleveli;
        Drive drv;
-       Envelope3 env;
-       FilterCoefficients fc[HUMANIZER_PHASE];
+       Envelope3 env[HUMANIZER_PHASE], env2;
+       FilterCoefficients fc[HUMANIZER_PHASE], fc2;
 } InfoHumanizer;
 
+typedef struct InfoDistortion_t{
+       int8 mode, od_sw, od_type, bass_od;
+       FLOAT_T level, drive, tone, edge;
+       FLOAT_T drived, leveld, curve1, curve2;
+       FilterCoefficients od_fc1, od_fc2, od_fc3;
+       void (*do_od1)(DATA_T *buf, FLOAT_T gain, FLOAT_T curve);
+       void (*do_od2)(DATA_T *buf, FLOAT_T gain, FLOAT_T curve);
+} InfoDistortion;
+
+typedef struct {
+       int8 init, mode, amp_sw, cab_sw, amp_type, cab_type;
+       FLOAT_T level, gain1, gain2, tone, bright, mic_pos, mic_level, mic_direct, ch_delay;
+       FLOAT_T eq1_gain, eq2_gain, eq3_gain, eq4_gain;
+       FLOAT_T leveld, gain1d, gain2d, curve1, curve2, curve3;
+       void (*do_amp1)(DATA_T *buf, FLOAT_T gain, FLOAT_T curve);
+       void (*do_amp2)(DATA_T *buf, FLOAT_T gain, FLOAT_T curve);
+       void (*do_amp3)(DATA_T *buf, FLOAT_T gain, FLOAT_T curve);
+       FilterCoefficients amp_fc1, amp_fc2, amp_fc3, amp_fc4;
+       FilterCoefficients eq_fc1, eq_fc2, eq_fc3, eq_fc4;
+       FilterCoefficients cab_fc1, cab_fc2, cab_fc3;
+       int8 speaker_num, stack_num;
+       FLOAT_T mwet, mdry, flevel, blevel;
+       DATA_T *ptr1, *ptr2, *ptr3, *ptr4;
+       int32 index, offset1, offset2[3], offset3[2];
+} InfoAmpSimulator;
+
 /*! Overdrive 1 / Distortion 1 */
 typedef struct {
        double level, leveld;
@@ -896,6 +945,7 @@ typedef struct {
        int8 drive, amp_sw, amp_type, type, mode;
        Drive drv1, drv2, drv3;
        FilterCoefficients bw1, bw2, bw3, bw4, bq;
+       InfoAmpSimulator amp;
 } InfoOverdrive;
 
 #define PHASER_PHASE 12
@@ -1053,33 +1103,6 @@ typedef struct {
 } InfoAmbience;
 
 
-typedef struct InfoDistortion_t{
-       int8 mode, od_sw, od_type, bass_od;
-       FLOAT_T level, drive, tone, edge;
-       FLOAT_T drived, leveld, curve1, curve2;
-       FilterCoefficients od_fc1, od_fc2, od_fc3;
-       void (*do_od1)(DATA_T *buf, FLOAT_T gain, FLOAT_T curve);
-       void (*do_od2)(DATA_T *buf, FLOAT_T gain, FLOAT_T curve);
-} InfoDistortion;
-
-typedef struct {
-       int8 init, mode, amp_sw, cab_sw, amp_type, cab_type;
-       FLOAT_T level, gain1, gain2, tone, bright, mic_pos, mic_level, mic_direct, ch_delay;
-       FLOAT_T eq1_gain, eq2_gain, eq3_gain, eq4_gain;
-       FLOAT_T leveld, gain1d, gain2d, curve1, curve2, curve3;
-       void (*do_amp1)(DATA_T *buf, FLOAT_T gain, FLOAT_T curve);
-       void (*do_amp2)(DATA_T *buf, FLOAT_T gain, FLOAT_T curve);
-       void (*do_amp3)(DATA_T *buf, FLOAT_T gain, FLOAT_T curve);
-       FilterCoefficients amp_fc1, amp_fc2, amp_fc3, amp_fc4;
-       FilterCoefficients eq_fc1, eq_fc2, eq_fc3, eq_fc4;
-       FilterCoefficients cab_fc1, cab_fc2, cab_fc3;
-       int8 speaker_num, stack_num;
-       FLOAT_T mwet, mdry, flevel, blevel;
-       DATA_T *ptr1, *ptr2, *ptr3, *ptr4;
-       int32 index, offset1, offset2[3], offset3[2];
-} InfoAmpSimulator;
-
-
 
 /******************************** GS EFFECT ********************************/
 
index 1a49a88..4453832 100644 (file)
@@ -362,6 +362,21 @@ static inline void recalc_filter_LPF12_2(FilterCoefficients *fc)
        }
 }
 
+static inline void buffer_filter_LPF12_2(FILTER_T *dc, FILTER_T *db, DATA_T *sp, int32 count)
+{
+       int32 i;
+       FILTER_T db0 = db[0], db1 = db[1], dc0 = dc[0], dc1 = dc[1];
+
+       for (i = 0; i < count; i++) {
+               db1 += imuldiv28(((sp[i] << 4) - db0), dc1);
+               db0 += db1;
+               db1 = imuldiv28(db1, dc0);
+               sp[i] = db0 >> 4; /* 4.28 to 8.24 */
+       }
+       db[0] = db0;
+       db[1] = db1;
+}
+
 static inline void sample_filter_LPF24_2(FILTER_T *dc, FILTER_T *db, DATA_T *sp)
 {
        db[0] = *sp << 4;
@@ -1873,6 +1888,7 @@ static inline void recalc_filter_LPF12_2(FilterCoefficients *fc)
 {
        FILTER_T *dc = fc->dc;
        FLOAT_T f, q ,p, r;
+       FLOAT_T c0, c1, a0, b1, b2;
 
 // Resonant IIR lowpass (12dB/oct) Olli Niemitalo //r
        if(FLT_FREQ_MARGIN || FLT_RESO_MARGIN){
@@ -1881,11 +1897,70 @@ static inline void recalc_filter_LPF12_2(FilterCoefficients *fc)
                CALC_RESO_MARGIN
                f = M_PI2 * fc->freq * fc->div_flt_rate;
                q = 1.0 - f / (2.0 * (RESO_DB_CF_P(fc->reso_dB) + 0.5 / (1.0 + f)) + f - 2.0);
-               dc[0] = q * q;
-               dc[1] = dc[0] + 1.0 - 2.0 * cos(f) * q;
+               
+               c0 = q * q;
+               c1 = c0 + 1.0 - 2.0 * cos(f) * q;
+               dc[0] = c0;
+               dc[1] = c1;
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               a0 = c1;
+               b1 = 1 + c0 - c1;
+               b2 = -c0;
+               dc[2] = a0;
+               dc[3] = a0 * b1;
+               dc[4] = 0;
+               dc[5] = a0;
+               dc[6] = b2;
+               dc[7] = b2 * b1;
+               dc[8] = b1;
+               dc[9] = b1 * b1 + b2;
+#endif
        }
 }
 
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+// SIMD optimization (double * 2)
+static inline void buffer_filter_LPF12_2(FILTER_T *dc, FILTER_T *db, DATA_T *sp, int32 count)
+{
+       int32 i;
+       __m128d vcx0 = _mm_loadu_pd(dc + 2);
+       __m128d vcx1 = _mm_loadu_pd(dc + 4);
+       __m128d vcym2 = _mm_loadu_pd(dc + 6);
+       __m128d vcym1 = _mm_loadu_pd(dc + 8);
+       __m128d vy = _mm_loadu_pd(db + 2);
+       __m128d vym2 = _mm_unpacklo_pd(vy, vy);
+       __m128d vym1 = _mm_unpackhi_pd(vy, vy);
+
+       for (i = 0; i < count; i += 2) {
+               __m128d vin = _mm_loadu_pd(sp + i);
+               __m128d vx0 = _mm_unpacklo_pd(vin, vin);
+               __m128d vx1 = _mm_unpackhi_pd(vin, vin);
+               vy = MM_FMA4_PD(vcx0, vx0,  vcx1, vx1,  vcym2, vym2,  vcym1, vym1);
+               _mm_storeu_pd(sp + i, vy);
+               vym2 = _mm_unpacklo_pd(vy, vy);
+               vym1 = _mm_unpackhi_pd(vy, vy);
+       }
+       _mm_storeu_pd(db + 2, vy);
+}
+
+#else // scalar
+static inline void buffer_filter_LPF12_2(FILTER_T *dc, FILTER_T *db, DATA_T *sp, int32 count)
+{
+       int32 i;
+       FILTER_T db0 = db[0], db1 = db[1], dc0 = dc[0], dc1 = dc[1];
+
+       for (i = 0; i < count; i++) {
+               db1 += (sp[i] - db0) * dc1;
+               db0 += db1;
+               sp[i] = db0;
+               db1 *= dc0;
+       }
+       db[0] = db0;
+       db[1] = db1;
+}
+
+#endif // (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+
 static inline void sample_filter_LPF24_2(FILTER_T *dc, FILTER_T *db, DATA_T *sp)
 {
        db[0] = *sp;
@@ -3624,6 +3699,13 @@ inline void buffer_filter(FilterCoefficients *fc, DATA_T *sp, int32 count)
 #endif
        if(!fc->type)
                return; // filter none
+
+       if (fc->type == FILTER_LPF12_2) {
+               recalc_filter_LPF12_2(fc);
+               buffer_filter_LPF12_2(fc->dc, &fc->db[FILTER_FB_L], sp, count);
+               return;
+       }
+       
        fc->recalc_filter(fc);
        for(i = 0; i < count; i++)
                fc->sample_filter(fc->dc, &fc->db[FILTER_FB_L], &sp[i]);
@@ -4462,8 +4544,8 @@ void apply_fir_eq(FIR_EQ *fc, DATA_T *buf, int32 count)
                                vout[1] = MM_FMA_PD(vdc, _mm_loadu_pd(&fc->buff[1][ofs]), vout[1]); // out[1] += fc->dc[j] * fc->buff[1][ofs];
                        }
                        // vout[0](L0,L1) vout[1](R0,R1)
-                       tmp[0] = _mm_shuffle_pd(vout[0], vout[1], 0x0); // (L0,R0)
-                       tmp[1] = _mm_shuffle_pd(vout[0], vout[1], 0x3); // (L1,R1)
+                       tmp[0] = _mm_unpacklo_pd(vout[0], vout[1]); // (L0,R0)
+                       tmp[1] = _mm_unpackhi_pd(vout[0], vout[1]); // (L1,R1)
                        tmp[0] = _mm_add_pd(tmp[0], tmp[1]); // (L0+L1,R0+R1)
                        _mm_store_pd(&buf[i], tmp[0]); // buf[i] = out[0]; buf[i + 1] = out[1];
                }
index d73d885..34fe29c 100644 (file)
@@ -532,11 +532,11 @@ static void apply_bank_parameter(Instrument *ip, ToneBankElement *tone)
                for (i = 0; i < ip->samples; i++) {
                        sp = &ip->sample[i];
                        if (tone->tunenum == 1) {
-                               sp->root_freq = adjust_tune_freq(sp->root_freq, tone->tune[0]);
-                               sp->tune *= pow(2.0, (double)tone->tune[0] * DIV_12); // use int_synth
+                       //      sp->root_freq = adjust_tune_freq(sp->root_freq, tone->tune[0]);
+                               sp->tune *= pow(2.0, (double)tone->tune[0] * DIV_12);
                        } else if (i < tone->tunenum) {
-                               sp->root_freq = adjust_tune_freq(sp->root_freq, tone->tune[i]);
-                               sp->tune *= pow(2.0, (double)tone->tune[i] * DIV_12); // use int_synth
+                       //      sp->root_freq = adjust_tune_freq(sp->root_freq, tone->tune[i]);
+                               sp->tune *= pow(2.0, (double)tone->tune[i] * DIV_12);
                        }
                }
        if (tone->envratenum)
@@ -1174,13 +1174,11 @@ fail:
                /* convert freq to key */       
                {
                        int32 freq1, freq2;
-                       int k;
-                       
-                       sp->root_freq = root_freq; // \83\8b\81[\83g\83L\81[\8eü\94g\90\94(freq_table[sp->root_key])\82Æ\82Ì\94ä(tune)\82ª\8aÜ\82Ü\82ê\82Ä\82¢\82é                      
+                       int k;                  
+                                       
                        sp->low_key = 0;
                        sp->high_key = 127;
                        sp->root_key = 60;
-                       sp->tune = 1.0;
                        for(k = 0; k < 128; k++){
                                if(k == 0){
                                        freq1 = 0;
@@ -1199,7 +1197,13 @@ fail:
                                if(root_freq >= freq1 && root_freq < freq2)
                                        sp->root_key = k;
                        }
-               //      sp->tune = (FLOAT_T)freq_table[sp->root_key] / (FLOAT_T)root_freq; // 
+#if 1 // c219 \83\8b\81[\83g\83L\81[\8eü\94g\90\94\82Ætune\82ð\95ª\97£
+                       sp->root_freq = freq_table[sp->root_key];
+                       sp->tune = (FLOAT_T)sp->root_freq / (FLOAT_T)root_freq;                 
+#else // root_freq\82Í\83\8b\81[\83g\83L\81[\8eü\94g\90\94(freq_table[sp->root_key])\82Æ\82Ì\94ä(tune)\82ª\8aÜ\82Ü\82ê\82Ä\82¢\82é
+                       sp->root_freq = root_freq;      
+                       sp->tune = 1.0;
+#endif
                        ctl->cmsg(CMSG_INFO, VERB_DEBUG, "Rate=%d LK=%d HK=%d RK=%d RF=%d Tune=%f",
                                        sp->sample_rate, sp->low_key, sp->high_key, sp->root_key, sp->root_freq, sp->tune);
                }
index a136263..2c77c44 100644 (file)
@@ -528,7 +528,10 @@ void mix_voice(DATA_T *buf, int v, int32 c)
        if (delay_cnt) {
                if(delay_cnt == c)
                        return;
-               else if (play_mode->encoding & PE_MONO)
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               delay_cnt &= ~(0x1); // for filter SIMD optimaize (filter.c buffer_filter()
+#endif
+               if (play_mode->encoding & PE_MONO)
                        buf += delay_cnt;
                else
                        buf += delay_cnt * 2;
index e18a1b1..8169418 100755 (executable)
@@ -696,7 +696,9 @@ LSU : Unalignment (use loadu/storeu
 #if (USE_X86_EXT_INTRIN >= 9)
 #define MM256_FMA_PD(vec_a, vec_b, vec_c) _mm256_fmadd_pd(vec_a, vec_b, vec_c)
 #define MM256_FMA2_PD(vec_a, vec_b, vec_c, vec_d) _mm256_fmadd_pd(vec_a, vec_b, _mm256_mul_pd(vec_c, vec_d))
-#define MM256_FMA3_PD(v00, v01, v10, v11, v20, v21) _mm256_fmadd_pd(v20, v21, _mm256_fmadd_pd(v10, v11, _mm256_mul_pd(v00, v01))
+#define MM256_FMA3_PD(v00, v01, v10, v11, v20, v21) _mm256_fmadd_pd(v20, v21, _mm256_fmadd_pd(v10, v11, _mm256_mul_pd(v00, v01)))
+#define MM256_FMA4_PD(v00, v01, v10, v11, v20, v21, v30, v31) _mm256_add_pd(\
+       _mm256_fmadd_pd(v30, v31, _mm256_mul_pd(v20, v21)), _mm256_fmadd_pd(v10, v11, _mm256_mul_pd(v00, v01)) )
 #define MM256_LS_FMA_PD(ptr, vec_a, vec_b) _mm256_store_pd(ptr, _mm256_fmadd_pd(vec_a, vec_b, _mm256_load_pd(ptr)))
 #define MM256_LSU_FMA_PD(ptr, vec_a, vec_b) _mm256_storeu_pd(ptr, _mm256_fmadd_pd(vec_a, vec_b, _mm256_loadu_pd(ptr)))
 #define MM256_MSUB_PD(vec_a, vec_b, vec_c) _mm256_fmsub_pd(vec_a, vec_b, vec_c)
@@ -711,6 +713,8 @@ LSU : Unalignment (use loadu/storeu
 #define MM256_FMA2_PD(vec_a, vec_b, vec_c, vec_d) _mm256_add_pd(_mm256_mul_pd(vec_a, vec_b), _mm256_mul_pd(vec_c, vec_d))
 #define MM256_FMA3_PD(v00, v01, v10, v11, v20, v21) _mm256_add_pd(\
        _mm256_add_pd(_mm256_mul_pd(v00, v01),_mm256_mul_pd(v10, v11)), _mm256_mul_pd(v20, v21))
+#define MM256_FMA4_PD(v00, v01, v10, v11, v20, v21, v30, v31) _mm256_add_pd(\
+       _mm256_add_pd(_mm256_mul_pd(v00, v01),_mm256_mul_pd(v10, v11)), _mm256_add_pd(_mm256_mul_pd(v20, v21),_mm256_mul_pd(v30, v31)))
 #define MM256_LS_FMA_PD(ptr, vec_a, vec_b) _mm256_store_pd(ptr, _mm256_add_pd(_mm256_load_pd(ptr), _mm256_mul_pd(vec_a, vec_b)))
 #define MM256_LSU_FMA_PD(ptr, vec_a, vec_b) _mm256_storeu_pd(ptr, _mm256_add_pd(_mm256_loadu_pd(ptr), _mm256_mul_pd(vec_a, vec_b)))
 #define MM256_MSUB_PD(vec_a, vec_b, vec_c) _mm256_sub_pd(_mm256_mul_pd(vec_a, vec_b), vec_c)
@@ -758,7 +762,7 @@ LSU : Unalignment (use loadu/storeu
 #define MM_FMA3_PD(v00, v01, v10, v11, v20, v21) _mm_add_pd(\
        _mm_add_pd(_mm_mul_pd(v00, v01),_mm_mul_pd(v10, v11)), _mm_mul_pd(v20, v21) )
 #define MM_FMA4_PD(v00, v01, v10, v11, v20, v21, v30, v31) _mm_add_pd(\
-       _mm_add_pd(_mm_mul_pd(v00, v01),_mm_mul_pd(v10, v11)), _mm_add_pd(_mm_mul_pd(v20, v21),_mm_mul_pd(v30, v31))))
+       _mm_add_pd(_mm_mul_pd(v00, v01),_mm_mul_pd(v10, v11)), _mm_add_pd(_mm_mul_pd(v20, v21),_mm_mul_pd(v30, v31)))
 #define MM_FMA5_PD(v00, v01, v10, v11, v20, v21, v30, v31, v40, v41) _mm_add_pd(_mm_add_pd(\
        _mm_add_pd(_mm_mul_pd(v00, v01),_mm_mul_pd(v10, v11)), _mm_add_pd(_mm_mul_pd(v20, v21),_mm_mul_pd(v30, v31)))\
        , _mm_mul_pd(v40, v41))
@@ -832,11 +836,20 @@ LSU : Unalignment (use loadu/storeu
 #define MM_EXTRACT_F32(reg,idx) _mm_cvtss_f32(_mm_shuffle_ps(reg,reg,idx))
 #define MM_EXTRACT_F64(reg,idx) _mm_cvtsd_f64(_mm_shuffle_pd(reg,reg,idx))
 #define MM_EXTRACT_I32(reg,idx) _mm_cvtsi128_si32(_mm_shuffle_epi32(reg,idx))
+#if (USE_X86_EXT_INTRIN >= 9)
+#define MM256_EXTRACT_F32(reg,idx) _mm256_cvtss_f32(_mm256_permutevar8x32_ps(reg,idx))
+#define MM256_EXTRACT_F64(reg,idx) _mm256_cvtsd_f64(_mm256_permute4x64_pd(reg,idx))
+#else
+#define MM256_EXTRACT_F32(reg,idx) _mm_cvtss_f32(_mm_permute_ps(_mm256_extractf128_ps(reg, idx >= 4), idx % 4))
+#define MM256_EXTRACT_F64(reg,idx) _mm_cvtsd_f64(_mm_permute_pd(_mm256_extractf128_ps(reg, idx >= 2), idx % 2))
+#endif
 #define MM256_EXTRACT_I32(reg,idx) _mm256_extract_epi32(reg,idx)
 #else
 #define MM_EXTRACT_F32(reg,idx) reg.m128_f32[idx]
 #define MM_EXTRACT_F64(reg,idx) reg.m128d_f64[idx]
 #define MM_EXTRACT_I32(reg,idx) reg.m128i_i32[idx]
+#define MM256_EXTRACT_F32(reg,idx) reg.m256_f32[idx]
+#define MM256_EXTRACT_F64(reg,idx) reg.m256d_f64[idx]
 #define MM256_EXTRACT_I32(reg,idx) reg.m256i_i32[idx]
 #endif
 #endif // (USE_X86_EXT_INTRIN >= 1)
index ad71194..1163ad0 100644 (file)
@@ -7216,7 +7216,7 @@ static void process_sysex_event(int ev, int ch, int val, int b)
                        case 0x52:
                        case 0x54:
                                temp = (b - 0x42) / 2;
-                               ctl->cmsg(CMSG_INFO, VERB_NOISY, "XG Variation %d Parameter MSB (%d)", msb + 1, temp, val);
+                               ctl->cmsg(CMSG_INFO, VERB_NOISY, "XG Variation %d Parameter MSB %d (%d)", msb + 1, temp, val);
                                if (variation_effect_xg[msb].set_param_msb[temp] != val) {
                                        variation_effect_xg[msb].set_param_msb[temp] = val;
                                        recompute_effect_xg(&variation_effect_xg[msb], 1);
@@ -7233,7 +7233,7 @@ static void process_sysex_event(int ev, int ch, int val, int b)
                        case 0x53:
                        case 0x55:
                                temp = (b - 0x43) / 2;
-                               ctl->cmsg(CMSG_INFO, VERB_NOISY, "XG Variation %d Parameter LSB (%d)", msb + 1, temp, val);
+                               ctl->cmsg(CMSG_INFO, VERB_NOISY, "XG Variation %d Parameter LSB %d (%d)", msb + 1, temp, val);
                                if (variation_effect_xg[msb].set_param_lsb[temp] != val) {
                                        variation_effect_xg[msb].set_param_lsb[temp] = val;
                                        recompute_effect_xg(&variation_effect_xg[msb], 1);
@@ -13555,7 +13555,7 @@ static int play_midi_load_file(char *fn,
        file_from_stdin = 0;
 
     ctl_mode_event(CTLE_NOW_LOADING, 0, (ptr_size_t)fn, 0);
-    ctl->cmsg(CMSG_INFO, VERB_VERBOSE, "MIDI file: %s", fn);
+    ctl->cmsg(CMSG_INFO, VERB_NORMAL, "MIDI file: %s", fn);
     if((tf = open_midi_file(fn, 1, OF_VERBOSE)) == NULL)
     {
        ctl_mode_event(CTLE_LOADING_DONE, 0, -1, 0);
index 89242aa..7b2e394 100644 (file)
@@ -5349,7 +5349,7 @@ do_linear:
 #endif
 }
 
-#if 0 //(USE_X86_EXT_INTRIN >= 9) // \96¢\83e\83X\83\93®\82­\82©\82Í\95s\96¾ broadcast\82Íset1\82¾\82Á\82½\82©\82à\81E\81
+#if (USE_X86_EXT_INTRIN >= 9)
 // offset:int32*8, resamp:float*8
 // \83\8b\81[\83v\93à\95\94\82Ìoffset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é , (sample_increment * (req_count+1)) < int32 max
 static inline DATA_T *resample_lagrange_multi(Voice *vp, DATA_T *dest, int32 req_count, int32 *out_count)
@@ -5362,24 +5362,24 @@ static inline DATA_T *resample_lagrange_multi(Voice *vp, DATA_T *dest, int32 req
        sample_t *src = vp->sample->data + (prec_offset >> FRACTION_BITS);
        const int32 start_offset = (int32)(resrc->offset - prec_offset); // offset\8cv\8eZ\82ðint32\92l\88æ\82É\82·\82é(SIMD\97p
        const int32 inc = resrc->increment;
-       const __m256i vinc = _mm256_broadcastd_epi32(inc * 8), vfmask = _mm256_broadcastd_epi32((int32)FRACTION_MASK);
-       __m256i vofs = _mm256_add_epi32(_mm256_broadcastd_epi32(start_offset), _mm256_set_epi32(inc*7,inc*6,inc*5,inc*4,inc*3,inc*2,inc,0));
-       const __m256 vdivf = _mm256_broadcastd_ps(div_fraction);        
-       const __m256 vfrac_6 = _mm256_broadcastd_ps(div_fraction * DIV_6);
-       const __m256 vfrac_2 = _mm256_broadcastd_ps(div_fraction * DIV_2);
-       const __m256 v3n = _mm256_broadcastd_ps(-3);
-       const __m256 v3p = _mm256_broadcastd_ps(3);
-       const __m256i vfrac = _mm256_broadcastd_epi32(mlt_fraction);
-       const __m256i vfrac2 = _mm256_broadcastd_epi32(ml2_fraction);
-       const __m256 vec_divo = _mm256_broadcastd_ps(DIV_15BIT);
+       const __m256i vinc = _mm256_set1_epi32(inc * 8), vfmask = _mm256_set1_epi32((int32)FRACTION_MASK);
+       __m256i vofs = _mm256_add_epi32(_mm256_set1_epi32(start_offset), _mm256_set_epi32(inc*7,inc*6,inc*5,inc*4,inc*3,inc*2,inc,0));
+       const __m256 vdivf = _mm256_set1_ps(div_fraction);      
+       const __m256 vfrac_6 = _mm256_set1_ps(div_fraction * DIV_6);
+       const __m256 vfrac_2 = _mm256_set1_ps(div_fraction * DIV_2);
+       const __m256 v3n = _mm256_set1_ps(-3);
+       const __m256 v3p = _mm256_set1_ps(3);
+       const __m256i vfrac = _mm256_set1_epi32(mlt_fraction);
+       const __m256i vfrac2 = _mm256_set1_epi32(ml2_fraction);
+       const __m256 vec_divo = _mm256_set1_ps(DIV_15BIT);
 #ifdef LAO_OPTIMIZE_INCREMENT
        // \8dÅ\93K\89»\83\8c\81[\83g = (\83\8d\81[\83h\83f\81[\83^\90\94 - \8f\89\8aú\83I\83t\83Z\83b\83g\8f¬\90\94\95\94\82Ì\8dÅ\91å\92l(1\96¢\96\9e) - \95â\8aÔ\83|\83C\83\93\83g\90\94(lagrange\82Í3) ) / \83I\83t\83Z\83b\83g\83f\81[\83^\90\94
        // \83\8d\81[\83h\83f\81[\83^\90\94\82Íint16\97ppermutevar\82ª\82È\82¢\82Ì\82Å\95Ï\8a·\8cã\82Ì32bit(int32/float)\82Ì8\83Z\83b\83g\82É\82È\82é
        const int32 opt_inc1 = (1 << FRACTION_BITS) * (8 - 1 - 3) / 8; // (float*8) * 1\83Z\83b\83g
        if(inc < opt_inc1){     // 1\83Z\83b\83g
-       const __m256i vvar1n = _mm256_broadcastd_epi32(-1);
-       const __m256i vvar1 = _mm256_broadcastd_epi32(1);
-       const __m256i vvar2 = _mm256_broadcastd_epi32(2);
+       const __m256i vvar1n = _mm256_set1_epi32(-1);
+       const __m256i vvar1 = _mm256_set1_epi32(1);
+       const __m256i vvar2 = _mm256_set1_epi32(2);
        for(i = 0; i < count; i += 8) {
        __m256i vofsi2 = _mm256_srli_epi32(vofs, FRACTION_BITS); // ofsi
        __m256i vofsi1 = _mm256_add_epi32(vofsi2, vvar1n); // ofsi-1
@@ -5393,12 +5393,12 @@ static inline DATA_T *resample_lagrange_multi(Voice *vp, DATA_T *dest, int32 req
        __m256i vofsub3 = _mm256_sub_epi32(vofsi3, vofsib); 
        __m256i vofsub4 = _mm256_sub_epi32(vofsi4, vofsib);
        __m256 vvf1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vin1)); // int16 to float (i16*8->i32*8->f32*8
-       __m256 vv1 = _mm256_permutevar8x32_ps(vvf1, vofsub1); // v1 ofsi-1
-       __m256 vv2 = _mm256_permutevar8x32_ps(vvf1, vofsub2); // v2 ofsi
-       __m256 vv3 = _mm256_permutevar8x32_ps(vvf1, vofsub3); // v2 ofsi+1
-       __m256 vv4 = _mm256_permutevar8x32_ps(vvf1, vofsub4); // v2 ofsi+2
+       __m256 vv0 = _mm256_permutevar8x32_ps(vvf1, vofsub1); // v1 ofsi-1
+       __m256 vv1 = _mm256_permutevar8x32_ps(vvf1, vofsub2); // v2 ofsi
+       __m256 vv2 = _mm256_permutevar8x32_ps(vvf1, vofsub3); // v2 ofsi+1
+       __m256 vv3 = _mm256_permutevar8x32_ps(vvf1, vofsub4); // v2 ofsi+2
        // \82 \82Æ\82Í\92Ê\8fí\82Æ\93¯\82
-       __m256i vofsf = _mm_add_epi32(_mm_and_si128(vofs, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+       __m256i vofsf = _mm256_add_epi32(_mm256_and_si256(vofs, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
        __m256 vtmp = _mm256_sub_ps(vv1, vv0); // tmp = v[1] - v[0];
        __m256 vtmp1, vtmp2, vtmp3, vtmp4;
        vv3 = _mm256_add_ps(vv3, _mm256_sub_ps(MM256_FMA2_PS(vv2, v3n, vv1, v3p), vv0)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
@@ -5454,7 +5454,7 @@ static inline DATA_T *resample_lagrange_multi(Voice *vp, DATA_T *dest, int32 req
        __m256 vv1 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_2)); // int16 to float (16bit*8 -> 32bit*8 > float*8
        __m256 vv2 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_3)); // int16 to float (16bit*8 -> 32bit*8 > float*8
        __m256 vv3 = _mm256_cvtepi32_ps(_mm256_cvtepi16_epi32(vi16_4)); // int16 to float (16bit*8 -> 32bit*8 > float*8
-       __m256i vofsf = _mm_add_epi32(_mm_and_si128(vofs, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
+       __m256i vofsf = _mm256_add_epi32(_mm256_and_si256(vofs, vfmask), vfrac); // ofsf = (ofs & FRACTION_MASK) + mlt_fraction;
        __m256 vtmp = _mm256_sub_ps(vv1, vv0); // tmp = v[1] - v[0];
        __m256 vtmp1, vtmp2, vtmp3, vtmp4;
        vv3 = _mm256_add_ps(vv3, _mm256_sub_ps(MM256_FMA2_PS(vv2, v3n, vv1, v3p), vv0)); // v[3] += -3 * v[2] + 3 * v[1] - v[0];
index ab269ef..eaae8fc 100644 (file)
@@ -877,61 +877,64 @@ static Instrument *load_from_file(SFInsts *rec, InstList *ip)
 
 #if defined(SF2_24BIT) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT))
 #if 1 /* SF2_24BIT_SAMPLE_TYPE_FLOAT */
-                       if(sp->lowbit > 0 ){
-                               /* 24 bit */
-                               splen_t cnt;
-                               uint8 *lowbit;
-                               uint16 *highbit;
-                               float *tmp_data;
-
-                               frames = divi_2(sp->len);
-                               sample->data = (sample_t*)safe_large_malloc(sizeof(float) * (frames + 128));
-                               sample->data_alloced = 1;
-                               sample->data_type = SAMPLE_TYPE_FLOAT;
-                               highbit = (uint16 *)safe_large_malloc(sizeof(int16) * frames); // 16bit
-                               lowbit = (uint8 *)safe_large_malloc(sizeof(int8) * frames); // 8bit                     
-                               tf_seek(tf, sp->start, SEEK_SET);
-                               tf_read(highbit, sp->len, 1, tf);
-                               tf_seek(tf, sp->lowbit, SEEK_SET);
-                               tf_read(lowbit, frames, 1, tf);
-                               tmp_data = (float *)sample->data;
-                               for(j = 0; j < frames; j++) {
-                                       // 24bit to int32full
-                                       int32 tmp_i = 0; // 1byte 00\82Å\82¢\82¢\82ç\82µ\82¢\81H
-                                       tmp_i |= (uint32)lowbit[j] << 8; // 2byte
-                                       tmp_i |= (uint32)highbit[j] << 16; // 3-4byte
+               if(sp->lowbit > 0 ){
+                   /* 24 bit */
+                   splen_t cnt;
+                   uint8 *lowbit;
+                       uint16 *highbit;
+                       float *tmp_data;
+
+                       frames = divi_2(sp->len);
+                   sample->data = (sample_t*)safe_large_malloc(sizeof(float) * (frames + 128));
+                   sample->data_alloced = 1;
+                       sample->data_type = SAMPLE_TYPE_FLOAT;
+                   highbit = (uint16 *)safe_large_malloc(sizeof(int16) * frames); // 16bit
+                   lowbit = (uint8 *)safe_large_malloc(sizeof(int8) * frames); // 8bit                 
+                       tf_seek(tf, sp->start, SEEK_SET);
+                       tf_read(highbit, sp->len, 1, tf);
+                   tf_seek(tf, sp->lowbit, SEEK_SET);
+                   tf_read(lowbit, frames, 1, tf);
+                       tmp_data = (float *)sample->data;
+                   for(j = 0; j < frames; j++) {
+                               // 24bit to int32full
+                           int32 tmp_i = 0; // 1byte 00\82Å\82¢\82¢\82ç\82µ\82¢\81H
+                               tmp_i |= (uint32)lowbit[j] << 8; // 2byte
+                           tmp_i |= (uint32)highbit[j] << 16; // 3-4byte
 #ifndef LITTLE_ENDIAN
-                                       XCHG_LONG(tmp_i)
+                               XCHG_LONG(tmp_i)
 #endif
-                                       tmp_data[j] = (float)tmp_i * DIV_31BIT;
-                               }
-                               safe_free(highbit);
-                               safe_free(lowbit);
-                               /* set a small blank loop at the tail for avoiding abnormal loop. */    
-                               memset(&tmp_data[frames], 0, sizeof(float) * 128);
-                               if (antialiasing_allowed)
-                                 antialiasing_float((float *)sample->data, sample->data_length >> FRACTION_BITS, sample->sample_rate, play_mode->rate);
-                       }else
+                               tmp_data[j] = (float)tmp_i * DIV_31BIT;
+                   }
+                   safe_free(highbit);
+                   safe_free(lowbit);
+                       /* set a small blank loop at the tail for avoiding abnormal loop. */    
+                       memset(&tmp_data[frames], 0, sizeof(float) * 128);
+                       if (antialiasing_allowed)
+                         antialiasing_float((float *)sample->data, sample->data_length >> FRACTION_BITS, sample->sample_rate, play_mode->rate);
+               }else
 #else /* SF2_24BIT_SAMPLE_TYPE_INT32 */
-                       if(sp->lowbit > 0 ){
-                               /* 24 bit */
-                               splen_t cnt;
-                               uint8 *lowbit;
-                               uint16 *highbit;
-                               uint32 *tmp_data;
+               if(sp->lowbit > 0 ){
+                   /* 24 bit */
+                   splen_t cnt;
+                   uint8 *lowbit;
+                       uint16 *highbit;
+                       uint32 *tmp_data;
 
                                frames = divi_2(sp->len);
                                sample->data = (sample_t*)safe_large_malloc(sizeof(int32) * (frames + 128));
                                sample->data_alloced = 1;
                                sample->data_type = SAMPLE_TYPE_INT32;
+
                                highbit = (uint16 *)safe_large_malloc(sizeof(int16) * frames); // 16bit
-                               lowbit = (uint8 *)safe_large_malloc(sizeof(int8) * frames); // 8bit                     
+                               lowbit = (uint8 *)safe_large_malloc(sizeof(int8) * frames); // 8bit
+
                                tf_seek(tf, sp->start, SEEK_SET);
                                tf_read(highbit, sp->len, 1, tf);
                                tf_seek(tf, sp->lowbit, SEEK_SET);
                                tf_read(lowbit, frames, 1, tf);
+
                                tmp_data = (uint32 *)sample->data;
-                               for(j = 0; j < frames; j++) {
+                               for (j = 0; j < frames; j++) {
                                        // 24bit to int32full
                                        uint32 tmp_i = 0; // 1byte 00\82Å\82¢\82¢\82ç\82µ\82¢\81H
                                        tmp_i |= (uint32)lowbit[j] << 8; // 2byte
@@ -939,18 +942,22 @@ static Instrument *load_from_file(SFInsts *rec, InstList *ip)
 #ifndef LITTLE_ENDIAN
                                        XCHG_LONG(tmp_i)
 #endif
-                                       tmp_data[j] = tmp_i;
+                                               tmp_data[j] = tmp_i;
                                }
                                safe_free(highbit);
                                safe_free(lowbit);
+
                                /* set a small blank loop at the tail for avoiding abnormal loop. */
                        //      tmp_data[frames] = tmp_data[frames + 1] = tmp_data[frames + 2] = 0;                     
                                memset(&tmp_data[frames], 0, sizeof(int32) * 128);
+
                                if (antialiasing_allowed)
-                                 antialiasing_int32((int32 *)sample->data, sample->data_length >> FRACTION_BITS, sample->sample_rate, play_mode->rate);
-                       }else
-#endif /* SF2_24BIT_SAMPLE_TYPE_FLOAT */
-#endif /* defined(SF2_24BIT) && (defined(DATA_T_DOUBLE) || defined(DATA_T_FLOAT)) */
+                                       antialiasing_int32((int32 *)sample->data, sample->data_length >> FRACTION_BITS, sample->sample_rate, play_mode->rate);
+
+                       }
+                       else
+#endif
+#endif
                        {
                                /* 16 bit */
                                frames = divi_2(sp->len);
@@ -974,7 +981,7 @@ static Instrument *load_from_file(SFInsts *rec, InstList *ip)
                                        antialiasing((int16 *)sample->data, sample->data_length >> FRACTION_BITS, sample->sample_rate, play_mode->rate);
                        }
                }
-///r
+
                /* resample it if possible */
                if (opt_pre_resamplation && sample->note_to_use && !(sample->modes & MODES_LOOPING))
                        pre_resample(sample);
index dc04821..5b09949 100644 (file)
@@ -72,6 +72,14 @@ static void do_compute_null(int thread_num){}
 static thread_func_t do_compute_func = do_compute_null;
 static int compute_thread_job = 0, compute_thread_job_cnt = 0;
 
+#if MULTI_THREAD_COMPUTE2
+static thread_func_t mtcs_func0 = do_compute_null;
+static thread_func_t mtcs_func1 = do_compute_null;
+static int mtcs_job_num0 = 0, mtcs_job_cnt0 = 0;
+static int mtcs_job_num1 = 0, mtcs_job_cnt1 = 0;
+static int8 mtcs_job_flg0[MTC2_JOB_MAX] = {0};
+static int8 mtcs_job_flg1[MTC2_JOB_MAX] = {0};
+#endif
 
 #if defined(MULTI_THREAD_COMPUTE) && defined(__W32__)
 
@@ -99,27 +107,52 @@ void set_compute_thread_priority(DWORD var)
 // OS\83f\83t\83H\83\8b\83g\82Å\82Í1~15\82Ì\8ew\92è\8e\9e 15.6ms 
 #endif
 
+
 static void compute_thread_core(int thread_num)
-{              
-       if(compute_thread_job <= compute_thread_ready){
-               if(thread_num < compute_thread_job)
-                       do_compute_func(thread_num);
-       }else{
-#if 1 // load_balance // \8bó\82¢\82Ä\82é\83X\83\8c\83b\83h\82É\83W\83\87\83u\8a\84\82è\93\96\82Ä
-               for(;;){
-                       int job_num;
+{      
+#if MULTI_THREAD_COMPUTE2
+       for(;;){
+               int job_num, job_nums0 = 0, job_nums1 = 0;
+               EnterCriticalSection(&critThread); // single thread ~
+               job_num = (compute_thread_job_cnt++);
+               LeaveCriticalSection(&critThread); // ~ single thread
+               if(job_num < compute_thread_job)
+                       do_compute_func(job_num);
+               if(mtcs_job_num0){
                        EnterCriticalSection(&critThread); // single thread ~
-                       job_num = (compute_thread_job_cnt++);
+                       job_nums0 = (mtcs_job_cnt0++);
                        LeaveCriticalSection(&critThread); // ~ single thread
-                       if(job_num >= compute_thread_job) break;
-                       do_compute_func(job_num);
+                       if(job_nums0 < mtcs_job_num0){
+                               mtcs_func0(job_nums0);
+                               mtcs_job_flg0[job_nums0] = 0;
+                       }
                }
+               if(mtcs_job_num1){
+                       EnterCriticalSection(&critThread); // single thread ~
+                       job_nums1 = (mtcs_job_cnt1++);
+                       LeaveCriticalSection(&critThread); // ~ single thread
+                       if(job_nums1 < mtcs_job_num1){
+                               mtcs_func1(job_nums1);
+                               mtcs_job_flg1[job_nums1] = 0;
+                       }
+               }
+               if(job_num >= compute_thread_job && job_nums0 >= mtcs_job_num0 && job_nums1 >= mtcs_job_num1)
+                       break;
+       }
+#elif 1 // load_balance // \8bó\82¢\82Ä\82é\83X\83\8c\83b\83h\82É\83W\83\87\83u\8a\84\82è\93\96\82Ä
+       for(;;){
+               int job_num;
+               EnterCriticalSection(&critThread); // single thread ~
+               job_num = (compute_thread_job_cnt++);
+               LeaveCriticalSection(&critThread); // ~ single thread
+               if(job_num >= compute_thread_job) break;
+               do_compute_func(job_num);
+       }
 #else // normal // \83X\83\8c\83b\83h\82É\8bÏ\93\99\82É\83W\83\87\83u\8a\84\82è\93\96\82Ä
-               int i;
-               for (i = thread_num; i < compute_thread_job; i += compute_thread_ready)
-                       do_compute_func(i);
+       int i;
+       for (i = thread_num; i < compute_thread_job; i += compute_thread_ready)
+               do_compute_func(i);
 #endif
-       }
 }
 
 static void WINAPI ComputeThread(void *arglist)
@@ -136,6 +169,57 @@ static void WINAPI ComputeThread(void *arglist)
        crt_endthread();
 }
 
+#ifdef MULTI_THREAD_COMPUTE2
+static inline void compute_thread_sub_wait(int8 *ptr)
+{
+// (MTC2_JOB_MAX == 16)
+       // byte*8\82ð64bit\92P\88Ê\82Å\94ä\8a
+       uint64 *ptr1 = (uint64 *)ptr;
+       while(ptr1[0] || ptr1[1])
+               THREAD_WAIT_MAIN
+}
+
+void go_compute_thread_sub0(thread_func_t fnc, int num)
+{
+       int i;
+       if(!compute_thread_job)
+               return; // error
+       if(mtcs_job_num0)
+               return; // error
+       if(!fnc || num < 1)
+               return; // error
+       for(i = 0; i < num; i++)
+               mtcs_job_flg0[i] = 1;
+       mtcs_func0 = fnc;
+       mtcs_job_cnt0 = 0;
+       mtcs_job_num0 = num; // start flag
+       compute_thread_core(0);
+       compute_thread_sub_wait(mtcs_job_flg0);
+       mtcs_job_num0 = 0; // end flag
+       mtcs_func0 = do_compute_null;
+}
+
+void go_compute_thread_sub1(thread_func_t fnc, int num)
+{
+       int i;
+       if(!compute_thread_job)
+               return; // error
+       if(mtcs_job_num1)
+               return; // error
+       if(!fnc || num < 1)
+               return; // error
+       for(i = 0; i < num; i++)
+               mtcs_job_flg1[i] = 1;
+       mtcs_func1 = fnc;
+       mtcs_job_cnt1 = 0;
+       mtcs_job_num1 = num; // start flag
+       compute_thread_core(0);
+       compute_thread_sub_wait(mtcs_job_flg1);
+       mtcs_job_num1 = 0; // end flag
+       mtcs_func1 = do_compute_null;
+}
+#endif // MULTI_THREAD_COMPUTE2
+
 static inline void compute_thread_wait(void)
 {      
 // (MAX_THREADS == 16)
@@ -160,6 +244,16 @@ void go_compute_thread(thread_func_t fnc, int num)
        do_compute_func = fnc;
        compute_thread_job = num;
        compute_thread_job_cnt = 0;
+#ifdef MULTI_THREAD_COMPUTE2
+       mtcs_func0 = do_compute_null;
+       mtcs_job_num0 = 0;
+       mtcs_job_cnt0 = 0;
+       mtcs_func1 = do_compute_null;
+       mtcs_job_num1 = 0;
+       mtcs_job_cnt1 = 0;
+       memset(mtcs_job_flg0, 0, sizeof(mtcs_job_flg0));
+       memset(mtcs_job_flg1, 0, sizeof(mtcs_job_flg1));
+#endif // MULTI_THREAD_COMPUTE2
 #if (USE_X86_EXT_INTRIN >= 3) && (MAX_THREADS == 16)
        _mm_store_si128((__m128i *)thread_finish, _mm_setzero_si128());  // \83X\83\8c\83b\83h\8fI\97¹\83t\83\89\83O\83\8a\83Z\83b\83g
        for(i = 0; i < thread; i++)
@@ -215,6 +309,12 @@ void terminate_compute_thread(void)
        
        thread_exit = 1;
        compute_thread_job = 0; // \83f\83b\83h\83\8d\83b\83N\91Î\8dô
+#ifdef MULTI_THREAD_COMPUTE2
+       mtcs_job_num0 = 0; // \83f\83b\83h\83\8d\83b\83N\91Î\8dô
+       memset(mtcs_job_flg0, 0, sizeof(mtcs_job_flg0));
+       mtcs_job_num1 = 0; // \83f\83b\83h\83\8d\83b\83N\91Î\8dô
+       memset(mtcs_job_flg1, 0, sizeof(mtcs_job_flg1));
+#endif // MULTI_THREAD_COMPUTE2
        for(i = 0; i < (MAX_THREADS - 1); i++){
                if(hComputeThread[i] == NULL)
                        continue;               
index 3b36056..e779ecc 100644 (file)
@@ -9,6 +9,7 @@
 #ifdef __W32__
 #if defined(ENABLE_THREAD) // && (defined(IA_W32GUI) || defined(IA_W32G_SYN) || defined(KBTIM) || defined(TIM_CUI))
 #define MULTI_THREAD_COMPUTE 1
+#define MULTI_THREAD_COMPUTE2 1 // sub thread
 #endif
 #endif /* __W32__ */
 
@@ -46,6 +47,12 @@ extern void terminate_compute_thread(void);
 extern void reset_compute_thread(void);
 extern void go_compute_thread(thread_func_t fnc, int num);
 
+#ifdef MULTI_THREAD_COMPUTE2
+#define MTC2_JOB_MAX 16 // 
+extern void go_compute_thread_sub0(thread_func_t fnc, int num);
+extern void go_compute_thread_sub1(thread_func_t fnc, int num);
+#endif // MULTI_THREAD_COMPUTE2
+
 #define CDM_JOB_NUM 16
 // 13 <= threads , thread_paymidi.c cdm_job_num
 // thread_mix.c voice_buffer_thread[
@@ -60,10 +67,12 @@ extern void compute_voice_scc_thread(int v, DATA_T *ptr, int32 count, int thread
 extern void compute_voice_mms_thread(int v, DATA_T *ptr, int32 count, int thread);
 
 
-#if 0 // test
-extern void do_compute_effect_thread1(int thread_num);
-#endif
-
+#ifdef MULTI_THREAD_COMPUTE2
+typedef void (*effect_sub_thread_func_t)(int thread_num, void *ptr);
+extern int set_effect_sub_thread(effect_sub_thread_func_t func, void *ptr, int num);
+extern void reset_effect_sub_thread(effect_sub_thread_func_t func, void *ptr);
+extern void go_effect_sub_thread(effect_sub_thread_func_t func, void *ptr, int num);
+#endif // MULTI_THREAD_COMPUTE2
 
 
 #endif // MULTI_THREAD_COMPUTE
index 38f31fa..8201594 100644 (file)
@@ -1839,6 +1839,9 @@ void do_master_effect_thread(void)
 #ifdef TEST_FIR_EQ
        apply_fir_eq(&test_fir_eq, master_effect_buffer_thread[cdmt_buf_o], me_cv[cdmt_buf_o].nsamples);
 #endif
+#ifdef TEST_FX
+       test_fx(master_effect_buffer_thread[cdmt_buf_o], me_cv[cdmt_buf_o].nsamples);
+#endif
        // elion add.
        mix_compressor(master_effect_buffer_thread[cdmt_buf_o], me_cv[cdmt_buf_o].nsamples);
        if(noise_sharp_type)
@@ -1869,9 +1872,79 @@ void do_effect_thread(DATA_T *buf, int32 count, int32 byte)
 
 
 
+/************************************ effect_sub_thread ***************************************/
+
+#ifdef MULTI_THREAD_COMPUTE2
+
+static effect_sub_thread_func_t est_func0 = NULL;
+static effect_sub_thread_func_t est_func1 = NULL;
+static void *est_ptr0 = NULL;
+static void *est_ptr1 = NULL;
 
+int set_effect_sub_thread(effect_sub_thread_func_t func, void *ptr, int num)
+{
+       //if(!compute_thread_ready) // single thread
+       //      return 1;
+       if(compute_thread_ready < 4)
+               return 1; // 
+       if(!func)
+               return 1; // error
+       if(func == est_func0 && ptr == est_ptr0)
+               return 0; // 
+       if(func == est_func1 && ptr == est_ptr1)
+               return 0; // 
+       if(est_func0 && est_func1)
+               return 1; // 
+       if(est_func0 == NULL){
+               est_func0 = func;
+               est_ptr0 = ptr;
+               return 0;
+       }
+       if(est_func1 == NULL){
+               est_func1 = func;
+               est_ptr1 = ptr;
+               return 0;
+       }
+       return 1; // error
+}
 
+void reset_effect_sub_thread(effect_sub_thread_func_t func, void *ptr)
+{
+       if(func == est_func0 && ptr == est_ptr0){
+               est_func0 = NULL;
+               est_ptr0 = NULL;
+       }else if(func == est_func1 && ptr == est_ptr1){
+               est_func1 = NULL;
+               est_ptr1 = NULL;
+       }else if(func == NULL && ptr == NULL){
+               est_func0 = NULL;
+               est_ptr0 = NULL;
+               est_func1 = NULL;
+               est_ptr1 = NULL;
+       }
+}
+
+static void effect_sub_thread0(int thread_num)
+{
+       if(est_func0)
+               est_func0(thread_num, est_ptr0);
+}
+
+static void effect_sub_thread1(int thread_num)
+{
+       if(est_func1)
+               est_func1(thread_num, est_ptr1);
+}
+
+void go_effect_sub_thread(effect_sub_thread_func_t func, void *ptr, int num)
+{
+       if(func == est_func0 && ptr == est_ptr0)
+               go_compute_thread_sub0(effect_sub_thread0, num);
+       else if(func == est_func1 && ptr == est_ptr1)
+               go_compute_thread_sub1(effect_sub_thread1, num);
+}
 
+#endif // MULTI_THREAD_COMPUTE2
 
 /************************************ inialize_effect ***************************************/
 
index 0a45a40..217b89c 100644 (file)
@@ -193,6 +193,36 @@ static inline void mix_mystery_signal_thread(DATA_T *sp, DATA_T *lp, int v, int
                                sp += 4;
                        }
 
+#elif (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE)
+                       __m128d vevol, vspx, vsp1, vsp2;
+                       reset_envelope2(&vp->mix_env, vp->left_mix, vp->right_mix, ENVELOPE_KEEP);
+                       for (i = 0; i < count; i += 4) {
+                               if(!(i & mix_env_mask)){
+                                       compute_envelope2(&vp->mix_env, opt_mix_envelope);
+#if defined(FLOAT_T_DOUBLE)    
+                                       vevol = _mm_loadu_pd(vp->mix_env.vol);
+#else
+                                       vevol = _mm_cvtps_pd(_mm_load_ps(vp->mix_env.vol));
+#endif
+                               }
+                               vspx = _mm_loadu_pd(sp);
+                               vsp1 = _mm_shuffle_pd(vspx, vspx, 0x0);
+                               vsp2 = _mm_shuffle_pd(vspx, vspx, 0x3);
+                               _mm_storeu_pd(lp, _mm_mul_pd(vsp1, vevol));
+                               lp += 2;
+                               _mm_storeu_pd(lp, _mm_mul_pd(vsp2, vevol));
+                               lp += 2;
+                               sp += 2;
+                               vspx = _mm_loadu_pd(sp);
+                               vsp1 = _mm_shuffle_pd(vspx, vspx, 0x0);
+                               vsp2 = _mm_shuffle_pd(vspx, vspx, 0x3);
+                               _mm_storeu_pd(lp, _mm_mul_pd(vsp1, vevol));
+                               lp += 2;
+                               _mm_storeu_pd(lp, _mm_mul_pd(vsp2, vevol));
+                               lp += 2;
+                               sp += 2;
+                       }
+
 #elif (USE_X86_EXT_INTRIN >= 2) && defined(DATA_T_FLOAT)
                        __m128 vevol, vsp, vsp1, vsp2;
                        reset_envelope2(&vp->mix_env, vp->left_mix, vp->right_mix, ENVELOPE_KEEP);
@@ -371,7 +401,10 @@ void mix_voice_thread(DATA_T *buf, int v, int32 c, int thread)
        if (delay_cnt) {
                if(delay_cnt == c)
                        return;
-               else if (play_mode->encoding & PE_MONO)
+#if (USE_X86_EXT_INTRIN >= 3) && defined(DATA_T_DOUBLE) && defined(FLOAT_T_DOUBLE)
+               delay_cnt &= ~(0x1); // for filter SIMD optimaize (filter.c buffer_filter()
+#endif
+               if (play_mode->encoding & PE_MONO)
                        buf += delay_cnt;
                else
                        buf += delay_cnt * 2;
index 4f0dccb..f303d91 100644 (file)
@@ -265,15 +265,16 @@ job_num
  2; // do effect // sd_mx0 // gm2_cho // gs_ins // xg_dly
  3; // do effect // sd_mx1 // gm2_mx0 // gs_cho // xg_cho
  4; // do effect // sd_mx2 // gm2_mx1 // gs_dly // xg_meq
- 5; // do effect // sd_cho // gm2_mx2 // gs_peq // \8bó
- 6; // do effect // sd_meq // gm2_meq // \8bó     // \8bó
- 7; // send effect // master,eq
- 8; // send effect // reverb
- 9; // send effect // chorus
-10; // send effect // gs_dly // xg_var // sd_mfx
+ 5; // do effect // sd_cho // gm2_mx2 // gs_peq // \8bó    
+ 6; // do effect // sd_meq // gm2_meq // \8bó     // \8bó    
+ 7; // send effect // master,eq                          
+ 8; // send effect // reverb                             
+ 9; // send effect // chorus                                                    
+10; // send effect // gs_dly // xg_var // sd_mfx         
 11; // send effect // gs_ins // XG \8bó // SD \8bó
 12; // compute_var
 4-20:\83{\83C\83X/\83`\83\83\83\93\83l\83\8b\82Í16\95ª\8a\84
+\82±\82ê\82Æ\82Í\95Ê\82É\83G\83t\83F\83N\83g\97p\83X\83\8c\83b\83h\82Í1;2;\88È\8d~\82Ì\8bó\82¢\82½\82Æ\82±\82ë\82É\92Ç\89Á\82³\82ê\82é (RevExMod\8eg\97p\82Ì\8fê\8d\87
 
 effect\82Í(\88ê\94Ô\95ª\8a\84\90\94\82ª\91½\82¢\82à\82Ì\82É\8d\87\82í\82¹\82Ä\82é\82Ì\82ÅMIDI\83V\83X\83e\83\80\82É\82æ\82Á\82Ä\8bó\82ª\82 \82é
 \93¯\82¶job_num\82Í1\83Z\83b\83g\82Ì\83W\83\87\83u (effect\82Í\90\94\82ª\8f­\82È\82¢\82Ì\82Æ\8bó\82«\82 \82é\82Ì\82Å\83{\83C\83X/\83`\83\83\83\93\83l\83\8b\82¾\82¯\82Ì\82à\82 \82é
@@ -283,6 +284,8 @@ effect
 (XG_INS*4(\95ÊCH)\82Ì\8fê\8d\87\82Í16\82ª\8dÅ\93K
 \95\89\89×\82Ì\8d\82\82¢\89Â\94\\90«\82Ì\82 \82é 3\88È\89º\82Í \83{\83C\83X/\83`\83\83\83\93\83l\83\8b\82Æ\95ª\97£
 \82È\82Ç\82¢\82ë\82¢\82ë\82 \82Á\82Ä 20\95ª\8a\84      
+
+
 */
 #define CDM_JOB_VC_OFFSET 4
 const int cdm_job_num = CDM_JOB_NUM; // 13 <= num // for voice/channel
index da829c9..8b6b002 100644 (file)
@@ -8783,8 +8783,8 @@ int main(int argc, char **argv)
        _CrtSetDbgFlag(CRTDEBUGFLAGS);
 #endif
        atexit(w32_exit);
-
-#ifdef ENABLE_VIRTUAL_TERMINAL_PROCESSING
+       
+#ifdef __W32__
        {
                HANDLE hStdOut = GetStdHandle(STD_OUTPUT_HANDLE);
 
@@ -8794,7 +8794,7 @@ int main(int argc, char **argv)
 
                        if (GetConsoleMode(hStdOut, &mode))
                        {
-                               SetConsoleMode(hStdOut, mode | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
+                               SetConsoleMode(hStdOut, mode | 0x0004 /* ENABLE_VIRTUAL_TERMINAL_PROCESSING */);
                        }
                }
        }
index d5f7282..eced547 100644 (file)
@@ -142,6 +142,14 @@ const IID tim_IID_IAudioClient           = {0x1CB9AD4C, 0xDBFA, 0x4C32, {0xB1, 0
 const IID tim_IID_IAudioRenderClient     = {0xF294ACFC, 0x3146, 0x4483, {0xA7, 0xBF, 0xAD, 0xDC, 0xA7, 0xC2, 0x60, 0xE2}};
 const IID tim_IID_IAudioClient2          = {0x726778CD, 0xF60A, 0x4EDA, {0x82, 0xDE, 0xE4, 0x76, 0x10, 0xCD, 0x78, 0xAA}};
 
+// Some compilers do not have the latest version of AudioClientProperties
+typedef struct {
+       UINT32 cbSize;
+       BOOL bIsOffload;
+       INT /* AUDIO_STREAM_CATEGORY */ eCategory;
+       INT /* AUDCLNT_STREAMOPTIONS */ Options;
+} timAudioClientProperties;
+
 #define SPEAKER_FRONT_LEFT        0x1
 #define SPEAKER_FRONT_RIGHT       0x2
 #define SPEAKER_FRONT_CENTER      0x4
@@ -932,20 +940,24 @@ int open_output(void)
 
                if (SUCCEEDED(IAudioClient_QueryInterface(pAudioClient, &tim_IID_IAudioClient2, (void**)&pAudioClient2)))
                {
-                       AudioClientProperties acp = {0};
-                       acp.cbSize = min(sizeof(AudioClientProperties), ver >= 4 ? 16 : 12);
+                       timAudioClientProperties acp = {0};
+                       acp.cbSize = (ver >= 4 ? 16 : 12);
                        acp.bIsOffload = FALSE;
                        acp.eCategory  = opt_wasapi_stream_category;
                
-#if (NTDDI_VERSION >= NTDDI_WINBLUE) && !defined(__MINGW32__)
-                       if(opt_wasapi_stream_option >= 2){
-                               if(ver >= 6) // win10\88È\8fã
-                                       acp.Options = AUDCLNT_STREAMOPTIONS_MATCH_FORMAT;
-                       }else if(opt_wasapi_stream_option == 1){
+                       if (opt_wasapi_stream_option & 4) {
+                               if (ver >= 6) // win10\88È\8fã
+                                       acp.Options |= 4 /* AUDCLNT_STREAMOPTIONS_AMBISONICS */;
+                       }
+                       if (opt_wasapi_stream_option & 2) {
+                               if (ver >= 6) // win10\88È\8fã
+                                       acp.Options |= 2 /* AUDCLNT_STREAMOPTIONS_MATCH_FORMAT */;
+                       }
+                       if (opt_wasapi_stream_option & 1){
                                if(ver >= 4) // win8.1\88È\8fã
-                                       acp.Options = AUDCLNT_STREAMOPTIONS_RAW;
+                                       acp.Options |= 1 /* AUDCLNT_STREAMOPTIONS_RAW */;
                        }
-#endif
+
                        hr = IAudioClient2_SetClientProperties(pAudioClient2, &acp);
                        IAudioClient2_Release(pAudioClient2);
                        if (FAILED(hr))
index fa0d4ec..7eba19e 100644 (file)
@@ -90,8 +90,8 @@
     <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(VCInstallDir)atlmfc\lib\amd64;$(VCInstallDir)lib\amd64;$(WindowsSdkDir)lib\x64;$(FrameworkSDKDir)\lib\x64;$(WindowsSdkDir)\lib\x64;C:\Program Files\Microsoft SDKs\Windows\v7.1\Lib\x64;</LibraryPath>
     <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(VCInstallDir)lib;$(VCInstallDir)atlmfc\lib;$(WindowsSdkDir)lib;$(FrameworkSDKDir)\lib;$(DXSDK_DIR)\ILib\x86;</LibraryPath>
     <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(VCInstallDir)atlmfc\lib\amd64;$(VCInstallDir)lib\amd64;$(WindowsSdkDir)lib\x64;$(FrameworkSDKDir)\lib\x64;$(WindowsSdkDir)\lib\x64;C:\Program Files\Microsoft SDKs\Windows\v7.1\Lib\x64;</LibraryPath>
-    <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">timw32g_c218</TargetName>
-    <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">timw32g_x64_c218</TargetName>
+    <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">timw32g_c219</TargetName>
+    <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">timw32g_x64_c219</TargetName>
     <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)_x64</TargetName>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
index 8fad708..17f07c2 100644 (file)
@@ -81,8 +81,8 @@
     <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
-    <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">twsyng_c218</TargetName>
-    <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">twsyng_x64_c218</TargetName>
+    <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">twsyng_c219</TargetName>
+    <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">twsyng_x64_c219</TargetName>
     <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(VCInstallDir)lib\amd64;$(VCInstallDir)atlmfc\lib\amd64;$(WindowsSdkDir)lib\x64;C:\Program Files\Microsoft SDKs\Windows\v7.1\Lib\x64;</LibraryPath>
     <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)_x64</TargetName>
     <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(LibraryPath);$(VSInstallDir);$(VSInstallDir)lib\amd64;$(WindowsSdkDir)lib\x64;</LibraryPath>