From: Anton Baskanov <baskanov@gmail.com> --- dlls/dsound/fir.h | 4 ++-- dlls/dsound/mixer.c | 53 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/dlls/dsound/fir.h b/dlls/dsound/fir.h index 76ac521e0f3..39a32af1412 100644 --- a/dlls/dsound/fir.h +++ b/dlls/dsound/fir.h @@ -90,7 +90,7 @@ int main() printf("#define FIR_WIDTH %d\n", fir_width); printf("#define FIR_STEP_SHIFT %d\n", fir_step_shift); printf("#define FIR_STEP %d\n", fir_step); - printf("static const float fir[] = {"); + printf("static const float DECLSPEC_ALIGN(16) fir[] = {"); // Print the FIR array with an additional row at the end. This simplifies // calculation of the interpolated value by allowing the index to overflow // into the extra row. It just repeats the first row, starting from its @@ -118,7 +118,7 @@ int main() #define FIR_WIDTH 64 #define FIR_STEP_SHIFT 7 #define FIR_STEP 128 -static const float fir[] = { +static const float DECLSPEC_ALIGN(16) fir[] = { 0.0000000000e+00, -2.4830013102e-06, 1.9318705150e-06, 2.6614854151e-06, -1.5313785194e-05, 4.2076214553e-05, -9.1417167945e-05, 1.7455895136e-04, -3.0567859821e-04, 5.0191365396e-04, -7.8311909082e-04, 1.1713337628e-03, diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c index 1b4b1c7bd7a..b8a7208ae32 100644 --- a/dlls/dsound/mixer.c +++ b/dlls/dsound/mixer.c @@ -25,6 +25,9 @@ #include <assert.h> #include <stdarg.h> #include <math.h> /* Insomnia - pow() function */ +#ifdef __SSE__ +#include <xmmintrin.h> +#endif #define COBJMACROS @@ -339,6 +342,50 @@ static void downsample(DWORD freq_adjust_den, DWORD freq_acc_start, float firgai } } +#ifdef __SSE__ + +void upsample_sse(LONG64 ipos_num, DWORD ipos_num_step, float rem_inv_float, + float rem_inv_step_float, UINT count, float *input, float *output) +{ + __m128 rem_inv = _mm_set1_ps(rem_inv_float); + __m128 rem_inv_step = _mm_set1_ps(rem_inv_step_float); + __m128 one = _mm_set1_ps(1.0f); + + UINT i; + + for(i = 0; i < count; ++i) { + UINT ipos = ipos_num >> FREQ_ADJUST_SHIFT; + UINT idx = ~(DWORD)ipos_num >> (FREQ_ADJUST_SHIFT - FIR_STEP_SHIFT) << FIR_WIDTH_SHIFT; + __m128 rem = _mm_sub_ps(one, rem_inv); + + int j; + __m128 sum = _mm_set1_ps(0.0f); + float* cache = &input[ipos]; + + C_ASSERT(!(FIR_WIDTH % 4)); + for (j = 0; j < FIR_WIDTH; j += 4) { + __m128 fir_value0 = _mm_mul_ps(_mm_load_ps(&fir[idx + j]), rem_inv); + __m128 fir_value1 = _mm_mul_ps(_mm_load_ps(&fir[idx + j + FIR_WIDTH]), rem); + __m128 fir_value = _mm_add_ps(fir_value0, fir_value1); + __m128 input_value = _mm_loadu_ps(&cache[j]); + sum = _mm_add_ps(sum, _mm_mul_ps(fir_value, input_value)); + } + + /* Add the even-numbered sums to the odd-numbered ones. */ + sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 3, 0, 1))); + /* Calculate the final sum and store it to the output array. */ + sum = _mm_add_ss(sum, _mm_movehl_ps(sum, sum)); + _mm_store_ss(&output[i], sum); + + rem_inv = _mm_add_ps(rem_inv, rem_inv_step); + rem_inv = _mm_sub_ps(rem_inv, _mm_and_ps(one, _mm_cmple_ps(one, rem_inv))); + + ipos_num += ipos_num_step; + } +} + +#endif + static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, float *input, float *output) { @@ -366,8 +413,11 @@ static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, fl float rem_inv = FIXED_0_32_TO_FLOAT((DWORD)ipos_num << FIR_STEP_SHIFT); float rem_inv_step = FIXED_0_32_TO_FLOAT(ipos_num_step << FIR_STEP_SHIFT); - UINT i; +#ifdef __SSE__ + upsample_sse(ipos_num, ipos_num_step, rem_inv, rem_inv_step, count, input, output); +#else + UINT i; for(i = 0; i < count; ++i) { UINT ipos = ipos_num >> FREQ_ADJUST_SHIFT; UINT idx = ~(DWORD)ipos_num >> (FREQ_ADJUST_SHIFT - FIR_STEP_SHIFT) << FIR_WIDTH_SHIFT; @@ -386,6 +436,7 @@ static void upsample(DWORD freq_adjust_num, DWORD freq_acc_start, UINT count, fl ipos_num += ipos_num_step; } +#endif } /** -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10716