Jinoh Kang (@iamahuman) commented about dlls/windows.media.speech/recognizer.c:
}
+static DWORD CALLBACK session_worker_thread_cb( void *args ) +{ + ISpeechContinuousRecognitionSession *iface = args; + struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface); + UINT32 frame_count, frames_available, tmp_buf_offset = 0; + BOOLEAN running = TRUE, paused = FALSE; + BYTE *audio_buf, *tmp_buf; + DWORD flags, status; + HANDLE events[2]; + + IAudioClient_Start(impl->audio_client); + IAudioClient_GetBufferSize(impl->audio_client, &frame_count); + + tmp_buf = malloc(sizeof(*tmp_buf) * frame_count * 2); /* multiplied with 2 because our audio frames have 16bit depth. */ As per the review comment below, replace `2` with something like `(WINE_VOSK_BITS_PER_SAMPLE + 7) / 8`.
-- https://gitlab.winehq.org/wine/wine/-/merge_requests/1948#note_21054