From: Anton Baskanov <baskanov@gmail.com> --- dlls/dsound/Makefile.in | 8 +++++ dlls/dsound/dsound_main.c | 8 +++++ dlls/dsound/dsound_private.h | 10 ++++++ dlls/dsound/fir.h | 10 ++++-- dlls/dsound/mixer.c | 10 +++++- dlls/dsound/mixer_sse.c | 65 ++++++++++++++++++++++++++++++++++++ 6 files changed, 108 insertions(+), 3 deletions(-) create mode 100644 dlls/dsound/mixer_sse.c diff --git a/dlls/dsound/Makefile.in b/dlls/dsound/Makefile.in index 1dd6dc2330c..a156d77b9a0 100644 --- a/dlls/dsound/Makefile.in +++ b/dlls/dsound/Makefile.in @@ -2,6 +2,8 @@ MODULE = dsound.dll IMPORTLIB = dsound IMPORTS = dxguid uuid winmm ole32 advapi32 user32 +mixer_sse_EXTRADEFS = -msse + VER_FILEDESCRIPTION_STR = "Wine DirectSound" VER_PRODUCTVERSION = 5,3,1,904 VER_OLESELFREGISTER = 1 @@ -18,3 +20,9 @@ SOURCES = \ primary.c \ propset.c \ sound3d.c + +i386_SOURCES = \ + mixer_sse.c + +x86_64_SOURCES = \ + mixer_sse.c diff --git a/dlls/dsound/dsound_main.c b/dlls/dsound/dsound_main.c index 8936b437ba2..c4dab2348e7 100644 --- a/dlls/dsound/dsound_main.c +++ b/dlls/dsound/dsound_main.c @@ -63,6 +63,8 @@ WINE_DEFAULT_DEBUG_CHANNEL(dsound); +BOOL sse_supported; + struct list DSOUND_renderers = LIST_INIT(DSOUND_renderers); CRITICAL_SECTION DSOUND_renderers_lock; static CRITICAL_SECTION_DEBUG DSOUND_renderers_lock_debug = @@ -82,6 +84,11 @@ GUID *DSOUND_capture_guids; /* All default settings, you most likely don't want to touch these, see wiki on UsefulRegistryKeys */ int ds_hel_buflen = 32768 * 2; +static void init_cpu_features(void) +{ + sse_supported = IsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE); +} + /* * Get a config key from either the app-specific or the default config */ @@ -787,6 +794,7 @@ BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpvReserved) DisableThreadLibraryCalls(hInstDLL); /* Increase refcount on dsound by 1 */ GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, (LPCWSTR)hInstDLL, &hInstDLL); + init_cpu_features(); break; case DLL_PROCESS_DETACH: if (lpvReserved) break; diff --git a/dlls/dsound/dsound_private.h b/dlls/dsound/dsound_private.h index 0e695698046..0ded79055e4 100644 --- a/dlls/dsound/dsound_private.h +++ b/dlls/dsound/dsound_private.h @@ -33,6 +33,7 @@ #include "wine/list.h" #define DS_MAX_CHANNELS 8 +#define FREQ_ADJUST_SHIFT 32 extern int ds_hel_buflen; @@ -251,6 +252,8 @@ HRESULT IDirectSoundCaptureImpl_Create(IUnknown *outer_unk, REFIID riid, void ** #define STATE_CAPTURING 2 #define STATE_STOPPING 3 +extern BOOL sse_supported; + extern CRITICAL_SECTION DSOUND_renderers_lock; extern struct list DSOUND_renderers; @@ -263,3 +266,10 @@ HRESULT get_mmdevice(EDataFlow flow, const GUID *tgt, IMMDevice **device); HRESULT enumerate_mmdevices(EDataFlow flow, GUID *guids, LPDSENUMCALLBACKW cb, void *user); + +/* mixer_sse.c */ + +#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__)) +void upsample_sse(LONG64 ipos_num, DWORD ipos_num_step, float rem_inv_float, + float rem_inv_step_float, UINT count, float *input, float *output); +#endif diff --git a/dlls/dsound/fir.h b/dlls/dsound/fir.h index 76ac521e0f3..68fa4ecf484 100644 --- a/dlls/dsound/fir.h +++ b/dlls/dsound/fir.h @@ -90,7 +90,9 @@ int main() printf("#define FIR_WIDTH %d\n", fir_width); printf("#define FIR_STEP_SHIFT %d\n", fir_step_shift); printf("#define FIR_STEP %d\n", fir_step); - printf("static const float fir[] = {"); + printf("extern const float DECLSPEC_ALIGN(16) fir[];\n"); + printf("#ifdef FIR_IMPLEMENTATION\n"); + printf("const float DECLSPEC_ALIGN(16) fir[] = {"); // Print the FIR array with an additional row at the end. This simplifies // calculation of the interpolated value by allowing the index to overflow // into the extra row. It just repeats the first row, starting from its @@ -112,13 +114,16 @@ int main() printf("\n"); } printf("};\n"); + printf("#endif\n"); } */ #define FIR_WIDTH_SHIFT 6 #define FIR_WIDTH 64 #define FIR_STEP_SHIFT 7 #define FIR_STEP 128 -static const float fir[] = { +extern const float DECLSPEC_ALIGN(16) fir[]; +#ifdef FIR_IMPLEMENTATION +const float DECLSPEC_ALIGN(16) fir[] = { 0.0000000000e+00, -2.4830013102e-06, 1.9318705150e-06, 2.6614854151e-06, -1.5313785194e-05, 4.2076214553e-05, -9.1417167945e-05, 1.7455895136e-04, -3.0567859821e-04, 5.0191365396e-04, -7.8311909082e-04, 1.1713337628e-03, @@ -2312,3 +2317,4 @@ static const float fir[] = { 1.7455895136e-04, -9.1417167945e-05, 4.2076214553e-05, -1.5313785194e-05, 2.6614854151e-06, 1.9318705150e-06, -2.4830013102e-06, 0.0000000000e+00, }; +#endif diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index 1b4b1c7bd7a..7a1eddaf057 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -38,11 +38,12 @@ #include "ks.h" #include "ksmedia.h" #include "dsound_private.h" + +#define FIR_IMPLEMENTATION #include "fir.h" WINE_DEFAULT_DEBUG_CHANNEL(dsound); -#define FREQ_ADJUST_SHIFT 32 #define FIXED_0_32_TO_FLOAT(x) ((int)((x) >> 1) * (1.0f / (1ll << 31))) void DSOUND_RecalcVolPan(PDSVOLUMEPAN volpan) @@ -368,6 +369,13 @@ static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, fl float rem_inv_step = FIXED_0_32_TO_FLOAT(ipos_num_step << FIR_STEP_SHIFT); UINT i; +#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__)) + if (sse_supported) { + upsample_sse(ipos_num, ipos_num_step, rem_inv, rem_inv_step, count, input, output); + return; + } +#endif + for(i = 0; i < count; ++i) { UINT ipos = ipos_num >> FREQ_ADJUST_SHIFT; UINT idx = ~(DWORD)ipos_num >> (FREQ_ADJUST_SHIFT - FIR_STEP_SHIFT) << FIR_WIDTH_SHIFT; diff --git a/dlls/dsound/mixer_sse.c b/dlls/dsound/mixer_sse.c new file mode 100644 index 00000000000..62957233556 --- /dev/null +++ b/dlls/dsound/mixer_sse.c @@ -0,0 +1,65 @@ +/* SSE versions of DirectSound mixing routines + * + * Copyright 2026 Anton Baskanov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include <xmmintrin.h> + +#include "windef.h" +#include "mmsystem.h" +#include "dsound.h" +#include "dsound_private.h" +#include "fir.h" + +void upsample_sse(LONG64 ipos_num, DWORD ipos_num_step, float rem_inv_float, + float rem_inv_step_float, UINT count, float *input, float *output) +{ + __m128 rem_inv = _mm_set1_ps(rem_inv_float); + __m128 rem_inv_step = _mm_set1_ps(rem_inv_step_float); + __m128 one = _mm_set1_ps(1.0f); + + UINT i; + + for(i = 0; i < count; ++i) { + UINT ipos = ipos_num >> FREQ_ADJUST_SHIFT; + UINT idx = ~(DWORD)ipos_num >> (FREQ_ADJUST_SHIFT - FIR_STEP_SHIFT) << FIR_WIDTH_SHIFT; + __m128 rem = _mm_sub_ps(one, rem_inv); + + int j; + __m128 sum = _mm_set1_ps(0.0f); + float* cache = &input[ipos]; + + for (j = 0; j < FIR_WIDTH; j += 4) { + __m128 fir_value0 = _mm_mul_ps(_mm_load_ps(&fir[idx + j]), rem_inv); + __m128 fir_value1 = _mm_mul_ps(_mm_load_ps(&fir[idx + j + FIR_WIDTH]), rem); + __m128 fir_value = _mm_add_ps(fir_value0, fir_value1); + __m128 input_value = _mm_loadu_ps(&cache[j]); + sum = _mm_add_ps(sum, _mm_mul_ps(fir_value, input_value)); + } + + /* Add the even-numbered sums to the odd-numbered ones. */ + sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 3, 0, 1))); + /* Calculate the final sum and store it to the output array. */ + sum = _mm_add_ss(sum, _mm_movehl_ps(sum, sum)); + _mm_store_ss(&output[i], sum); + + rem_inv = _mm_add_ps(rem_inv, rem_inv_step); + rem_inv = _mm_sub_ps(rem_inv, _mm_and_ps(one, _mm_cmple_ps(one, rem_inv))); + + ipos_num += ipos_num_step; + } +} -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10716