From 76fd9d215c25874b1c5d33355de0ed983922c32d Mon Sep 17 00:00:00 2001 From: Saracen Date: Thu, 24 May 2018 21:35:39 +0100 Subject: [PATCH] Fixes for microphone clipping and latency (marcelofg55) --- drivers/wasapi/audio_driver_wasapi.cpp | 88 ++++++++++++++++++-------- drivers/wasapi/audio_driver_wasapi.h | 2 + servers/audio/audio_stream.cpp | 16 +++-- servers/audio/audio_stream.h | 2 +- servers/audio_server.h | 5 ++ 5 files changed, 78 insertions(+), 35 deletions(-) diff --git a/drivers/wasapi/audio_driver_wasapi.cpp b/drivers/wasapi/audio_driver_wasapi.cpp index db09a61066c..8fe83a3be50 100644 --- a/drivers/wasapi/audio_driver_wasapi.cpp +++ b/drivers/wasapi/audio_driver_wasapi.cpp @@ -432,30 +432,27 @@ Error AudioDriverWASAPI::init_capture_devices(bool reinit) { microphone_device_output_wasapi->frame_size = (microphone_device_output_wasapi->bits_per_sample / 8) * microphone_device_output_wasapi->channels; microphone_device_output_wasapi->current_capture_index = 0; + microphone_device_output_wasapi->current_capture_size = 0; - if (pwfex->wFormatTag == WAVE_FORMAT_EXTENSIBLE) { + WORD format_tag = pwfex->wFormatTag; + if (format_tag == WAVE_FORMAT_EXTENSIBLE) { WAVEFORMATEXTENSIBLE *wfex = (WAVEFORMATEXTENSIBLE *)pwfex; if (wfex->SubFormat == KSDATAFORMAT_SUBTYPE_PCM) { - microphone_device_output_wasapi->microphone_format = MicrophoneDeviceOutputDirect::FORMAT_PCM; + format_tag = WAVE_FORMAT_PCM; } else if (wfex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) { - microphone_device_output_wasapi->microphone_format = MicrophoneDeviceOutputDirect::FORMAT_FLOAT; + format_tag = WAVE_FORMAT_IEEE_FLOAT; } else { ERR_PRINT("WASAPI: Format not supported"); ERR_FAIL_V(ERR_CANT_OPEN); } } else { - if (pwfex->wFormatTag != WAVE_FORMAT_PCM && pwfex->wFormatTag != WAVE_FORMAT_IEEE_FLOAT) { + if (format_tag != WAVE_FORMAT_PCM && format_tag != WAVE_FORMAT_IEEE_FLOAT) { ERR_PRINT("WASAPI: Format not supported"); ERR_FAIL_V(ERR_CANT_OPEN); - } else { - if (pwfex->wFormatTag == WAVE_FORMAT_PCM) { - microphone_device_output_wasapi->microphone_format = MicrophoneDeviceOutputDirect::FORMAT_PCM; - } else { - microphone_device_output_wasapi->microphone_format = MicrophoneDeviceOutputDirect::FORMAT_FLOAT; - } } } + microphone_device_output_wasapi->capture_format_tag = format_tag; hr = microphone_device_output_wasapi->audio_client->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, REFTIMES_PER_SEC, 0, pwfex, NULL); ERR_FAIL_COND_V(hr != S_OK, ERR_CANT_OPEN); @@ -466,7 +463,7 @@ Error AudioDriverWASAPI::init_capture_devices(bool reinit) { ERR_FAIL_COND_V(hr != S_OK, ERR_CANT_OPEN); // Set the buffer size - microphone_device_output_wasapi->buffer.resize(max_frames * 10); // 10 second test buffer (will crash after it's been filled due to lack of looping) + microphone_device_output_wasapi->buffer.resize(max_frames); memset(microphone_device_output_wasapi->buffer.ptrw(), 0x00, microphone_device_output_wasapi->buffer.size() * microphone_device_output_wasapi->frame_size); // Get the capture client @@ -611,6 +608,39 @@ void AudioDriverWASAPI::set_device(String device) { unlock(); } +float AudioDriverWASAPI::read_sample(WORD format_tag, int bits_per_sample, BYTE *buffer, int i) { + if (format_tag == WAVE_FORMAT_PCM) { + int32_t sample = 0; + switch (bits_per_sample) { + case 8: + sample = int32_t(((int8_t *)buffer)[i]) << 24; + break; + + case 16: + sample = int32_t(((int16_t *)buffer)[i]) << 16; + break; + + case 24: + sample |= int32_t(((int8_t *)buffer)[i * 3 + 2]) << 24; + sample |= int32_t(((int8_t *)buffer)[i * 3 + 1]) << 16; + sample |= int32_t(((int8_t *)buffer)[i * 3 + 0]) << 8; + break; + + case 32: + sample = ((int32_t *)buffer)[i]; + break; + } + + return (sample >> 16) / 32768.f; + } else if (format_tag == WAVE_FORMAT_IEEE_FLOAT) { + return ((float *)buffer)[i]; + } else { + ERR_PRINT("WASAPI: Unknown format tag"); + } + + return 0.f; +} + void AudioDriverWASAPI::write_sample(AudioDriverWASAPI *ad, BYTE *buffer, int i, int32_t sample) { if (ad->format_tag == WAVE_FORMAT_PCM) { switch (ad->bits_per_sample) { @@ -688,19 +718,27 @@ void AudioDriverWASAPI::thread_func(void *p_udata) { memset((char *)(microphone_device_output_wasapi->buffer.ptrw()) + (microphone_device_output_wasapi->current_capture_index * microphone_device_output_wasapi->frame_size), 0, frames_to_copy * microphone_device_output_wasapi->frame_size); } else { // fixme: Only works for floating point atm - if (microphone_device_output_wasapi->channels == 2) { - for (int j = 0; j < frames_to_copy; j++) { - float left = *(((float *)data) + (j * 2)); - float right = *(((float *)data) + (j * 2) + 1); - microphone_device_output_wasapi->buffer[microphone_device_output_wasapi->current_capture_index + j] = AudioFrame(left, right); + for (int j = 0; j < frames_to_copy; j++) { + float l, r; + + if (microphone_device_output_wasapi->channels == 2) { + l = read_sample(microphone_device_output_wasapi->capture_format_tag, microphone_device_output_wasapi->bits_per_sample, data, j * 2); + r = read_sample(microphone_device_output_wasapi->capture_format_tag, microphone_device_output_wasapi->bits_per_sample, data, j * 2 + 1); + } else if (microphone_device_output_wasapi->channels == 1) { + l = r = read_sample(microphone_device_output_wasapi->capture_format_tag, microphone_device_output_wasapi->bits_per_sample, data, j); + } else { + l = r = 0.f; + ERR_PRINT("WASAPI: unsupported channel count in microphone!"); } - } else if (microphone_device_output_wasapi->channels == 1) { - for (int j = 0; j < frames_to_copy; j++) { - float value = *(((float *)data) + j); - microphone_device_output_wasapi->buffer[microphone_device_output_wasapi->current_capture_index + j] = AudioFrame(value, value); + + microphone_device_output_wasapi->buffer[microphone_device_output_wasapi->current_capture_index++] = AudioFrame(l, r); + + if (microphone_device_output_wasapi->current_capture_index >= microphone_device_output_wasapi->buffer.size()) { + microphone_device_output_wasapi->current_capture_index = 0; + } + if (microphone_device_output_wasapi->current_capture_size < microphone_device_output_wasapi->buffer.size()) { + microphone_device_output_wasapi->current_capture_size++; } - } else { - ERR_PRINT("WASAPI: unsupported channel count in microphone!"); } } @@ -709,12 +747,6 @@ void AudioDriverWASAPI::thread_func(void *p_udata) { hr = microphone_device_output_wasapi->capture_client->GetNextPacketSize(&packet_length); ERR_BREAK(hr != S_OK); - - microphone_device_output_wasapi->current_capture_index += frames_to_copy; - - // Test: ensuring the read index is always behind the capture index keeps the input and output reliably in sync, but it - // also results in clipping, stutter and other audio artefacts - microphone_device_output_wasapi->set_read_index(microphone_device_output_wasapi->current_capture_index - 8192); } } diff --git a/drivers/wasapi/audio_driver_wasapi.h b/drivers/wasapi/audio_driver_wasapi.h index 084d0c2e3fd..e722d85353a 100644 --- a/drivers/wasapi/audio_driver_wasapi.h +++ b/drivers/wasapi/audio_driver_wasapi.h @@ -52,6 +52,7 @@ class AudioDriverWASAPI : public AudioDriver { public: IAudioClient *audio_client; IAudioCaptureClient *capture_client; + WORD capture_format_tag; }; // Mutex *mutex; @@ -79,6 +80,7 @@ class AudioDriverWASAPI : public AudioDriver { bool active; _FORCE_INLINE_ void write_sample(AudioDriverWASAPI *ad, BYTE *buffer, int i, int32_t sample); + static _FORCE_INLINE_ float read_sample(WORD format_tag, int bits_per_sample, BYTE *buffer, int i); static void thread_func(void *p_udata); StringName get_default_capture_device_name(IMMDeviceEnumerator *p_enumerator); diff --git a/servers/audio/audio_stream.cpp b/servers/audio/audio_stream.cpp index 8efcb5bf073..206f1861a35 100644 --- a/servers/audio/audio_stream.cpp +++ b/servers/audio/audio_stream.cpp @@ -155,19 +155,22 @@ void AudioStreamPlaybackMicrophone::_mix_internal(AudioFrame *p_buffer, int p_fr AudioDriver::MicrophoneDeviceOutput *microphone_device_output = reciever->owner; const Vector &source_buffer = microphone_device_output->get_buffer(); + int current_buffer_size = microphone_device_output->get_current_buffer_size(); - if (microphone_device_output->get_read_index() >= 0) { - for (int i = 0; i < p_frames; i++) { - p_buffer[i] = source_buffer[internal_mic_offset + microphone_device_output->get_read_index() + i]; + for (int i = 0; i < p_frames; i++) { + if (current_buffer_size >= internal_mic_offset) { + if (internal_mic_offset >= source_buffer.size()) { + internal_mic_offset = 0; + } + p_buffer[i] = source_buffer[internal_mic_offset++]; + } else { + p_buffer[i] = AudioFrame(0.f, 0.f); } } - - internal_mic_offset += p_frames; } void AudioStreamPlaybackMicrophone::mix(AudioFrame *p_buffer, float p_rate_scale, int p_frames) { AudioStreamPlaybackResampled::mix(p_buffer, p_rate_scale, p_frames); - internal_mic_offset = 0; // Reset } float AudioStreamPlaybackMicrophone::get_stream_sampling_rate() { @@ -175,6 +178,7 @@ float AudioStreamPlaybackMicrophone::get_stream_sampling_rate() { } void AudioStreamPlaybackMicrophone::start(float p_from_pos) { + internal_mic_offset = 0; active = true; // note: can this be called twice? diff --git a/servers/audio/audio_stream.h b/servers/audio/audio_stream.h index cb3b999cebb..352cb8c5078 100644 --- a/servers/audio/audio_stream.h +++ b/servers/audio/audio_stream.h @@ -127,7 +127,7 @@ class AudioStreamPlaybackMicrophone : public AudioStreamPlaybackResampled { friend class AudioStreamMicrophone; bool active; - uint64_t internal_mic_offset; + uint32_t internal_mic_offset; Ref microphone; AudioDriver::MicrophoneReciever *reciever; diff --git a/servers/audio_server.h b/servers/audio_server.h index f2c3aa0a6f9..68a56c38f37 100644 --- a/servers/audio_server.h +++ b/servers/audio_server.h @@ -90,6 +90,7 @@ public: virtual unsigned int get_mix_rate() = 0; virtual Vector &get_buffer() = 0; + virtual int get_current_buffer_size() = 0; virtual int get_read_index() = 0; virtual void set_read_index(int p_temp_index) = 0; @@ -138,6 +139,7 @@ public: int read_index = -2048; unsigned int current_capture_index; + unsigned int current_capture_size; Vector buffer; unsigned int get_mix_rate() { @@ -148,6 +150,8 @@ public: return buffer; }; + int get_current_buffer_size() { return current_capture_size; } + int get_read_index() { return read_index; } @@ -174,6 +178,7 @@ public: void set_read_index(int p_read_index) { owner->set_read_index(p_read_index); } + int get_current_buffer_size() { return owner->get_current_buffer_size(); } }; MicrophoneDeviceOutputIndirect *default_microphone_device_output;