[AUDIO_WORKLET] Added API for getting the buffer's quantum size (#22681)

cwoffenden · web-flow · commit ac381b83585c · 2024-10-15T13:11:10.000+03:00
The Web Audio API defines the processed sample size as always being
`128`, which is hardcoded in both the test code and docs. The upcoming
Web Audio API has the option to set this to a user defined setting or
request the machine's preference, so in preparation the Audio Worklet is
extended with a function to query the context's _quantum_ at creation
time (and before the worklet is created), and also the processing
callback contains a field with the same value.

For the simplest uses transitioning to the processing callback's field
value will mean future changes will simply work.

Once the 1.1 version of the Web Audio API is supported, the context
creation can be amended to accept a quantum hint, and any code written
again these PR's changes will still work.
diff --git a/site/source/docs/api_reference/wasm_audio_worklets.rst b/site/source/docs/api_reference/wasm_audio_worklets.rst
@@ -45,7 +45,9 @@ processing graph as AudioWorkletNodes.
 
 Once a class type is instantiated on the Web Audio graph and the graph is
 running, a C/C++ function pointer callback will be invoked for each 128
-samples of the processed audio stream that flows through the node.
+samples of the processed audio stream that flows through the node. Newer Web
+Audio API specs allow this to be changed, so for future compatibility use the
+``AudioSampleFrame``'s ``quantumSize`` to get the value.
 
 This callback will be executed on a dedicated separate audio processing
 thread with real-time processing priority. Each Web Audio context will
@@ -157,7 +159,7 @@ which resumes the audio context when the user clicks on the DOM Canvas element t
                         void *userData)
   {
     for(int i = 0; i < numOutputs; ++i)
-      for(int j = 0; j < 128*outputs[i].numberOfChannels; ++j)
+      for(int j = 0; j < outputs[i].quantumSize*outputs[i].numberOfChannels; ++j)
         outputs[i].data[j] = emscripten_random() * 0.2 - 0.1; // Warning: scale down audio volume by factor of 0.2, raw noise can be really loud otherwise
 
     return true; // Keep the graph output going
diff --git a/src/audio_worklet.js b/src/audio_worklet.js
@@ -31,6 +31,12 @@ function createWasmAudioWorkletProcessor(audioParams) {
       let opts = args.processorOptions;
       this.callbackFunction = Module['wasmTable'].get(opts['cb']);
       this.userData = opts['ud'];
+      // Plus the number of samples to process, fixed for the lifetime of the
+      // context that created this processor. Note for when moving to Web Audio
+      // 1.1: the typed array passed to process() should be the same size as the
+      // the quantum size, and this exercise of passing in the value shouldn't
+      // be required (to be verified).
+      this.quantumSize = opts['qs'];
     }
 
     static get parameterDescriptors() {
@@ -45,53 +51,59 @@ function createWasmAudioWorkletProcessor(audioParams) {
       let numInputs = inputList.length,
         numOutputs = outputList.length,
         numParams = 0, i, j, k, dataPtr,
-        stackMemoryNeeded = (numInputs + numOutputs) * 8,
+        quantumBytes = this.quantumSize * 4,
+        stackMemoryNeeded = (numInputs + numOutputs) * {{{ C_STRUCTS.AudioSampleFrame.__size__ }}},
         oldStackPtr = stackSave(),
         inputsPtr, outputsPtr, outputDataPtr, paramsPtr,
         didProduceAudio, paramArray;
 
       // Calculate how much stack space is needed.
-      for (i of inputList) stackMemoryNeeded += i.length * 512;
-      for (i of outputList) stackMemoryNeeded += i.length * 512;
-      for (i in parameters) stackMemoryNeeded += parameters[i].byteLength + 8, ++numParams;
+      for (i of inputList) stackMemoryNeeded += i.length * quantumBytes;
+      for (i of outputList) stackMemoryNeeded += i.length * quantumBytes;
+      for (i in parameters) stackMemoryNeeded += parameters[i].byteLength + {{{ C_STRUCTS.AudioParamFrame.__size__ }}}, ++numParams;
 
       // Allocate the necessary stack space.
       inputsPtr = stackAlloc(stackMemoryNeeded);
 
       // Copy input audio descriptor structs and data to Wasm
       k = inputsPtr >> 2;
-      dataPtr = inputsPtr + numInputs * 8;
+      dataPtr = inputsPtr + numInputs * {{{ C_STRUCTS.AudioSampleFrame.__size__ }}};
       for (i of inputList) {
         // Write the AudioSampleFrame struct instance
-        HEAPU32[k++] = i.length;
-        HEAPU32[k++] = dataPtr;
+        HEAPU32[k + {{{ C_STRUCTS.AudioSampleFrame.numberOfChannels / 4 }}}] = i.length;
+        HEAPU32[k + {{{ C_STRUCTS.AudioSampleFrame.quantumSize / 4 }}}] = this.quantumSize;
+        HEAPU32[k + {{{ C_STRUCTS.AudioSampleFrame.data / 4 }}}] = dataPtr;
+        k += {{{ C_STRUCTS.AudioSampleFrame.__size__ / 4 }}};
         // Marshal the input audio sample data for each audio channel of this input
         for (j of i) {
           HEAPF32.set(j, dataPtr>>2);
-          dataPtr += 512;
+          dataPtr += quantumBytes;
         }
       }
 
       // Copy output audio descriptor structs to Wasm
       outputsPtr = dataPtr;
       k = outputsPtr >> 2;
-      outputDataPtr = (dataPtr += numOutputs * 8) >> 2;
+      outputDataPtr = (dataPtr += numOutputs * {{{ C_STRUCTS.AudioSampleFrame.__size__ }}}) >> 2;
       for (i of outputList) {
         // Write the AudioSampleFrame struct instance
-        HEAPU32[k++] = i.length;
-        HEAPU32[k++] = dataPtr;
+        HEAPU32[k + {{{ C_STRUCTS.AudioSampleFrame.numberOfChannels / 4 }}}] = i.length;
+        HEAPU32[k + {{{ C_STRUCTS.AudioSampleFrame.quantumSize / 4 }}}] = this.quantumSize;
+        HEAPU32[k + {{{ C_STRUCTS.AudioSampleFrame.data / 4 }}}] = dataPtr;
+        k += {{{ C_STRUCTS.AudioSampleFrame.__size__ / 4 }}};
         // Reserve space for the output data
-        dataPtr += 512 * i.length;
+        dataPtr += quantumBytes * i.length;
       }
 
       // Copy parameters descriptor structs and data to Wasm
       paramsPtr = dataPtr;
       k = paramsPtr >> 2;
-      dataPtr += numParams * 8;
+      dataPtr += numParams * {{{ C_STRUCTS.AudioParamFrame.__size__ }}};
       for (i = 0; paramArray = parameters[i++];) {
         // Write the AudioParamFrame struct instance
-        HEAPU32[k++] = paramArray.length;
-        HEAPU32[k++] = dataPtr;
+        HEAPU32[k + {{{ C_STRUCTS.AudioParamFrame.length / 4 }}}] = paramArray.length;
+        HEAPU32[k + {{{ C_STRUCTS.AudioParamFrame.data / 4 }}}] = dataPtr;
+        k += {{{ C_STRUCTS.AudioParamFrame.__size__ / 4 }}};
         // Marshal the audio parameters array
         HEAPF32.set(paramArray, dataPtr>>2);
         dataPtr += paramArray.length*4;
@@ -105,7 +117,7 @@ function createWasmAudioWorkletProcessor(audioParams) {
         // not have one, so manually copy all bytes in)
         for (i of outputList) {
           for (j of i) {
-            for (k = 0; k < 128; ++k) {
+            for (k = 0; k < this.quantumSize; ++k) {
               j[k] = HEAPF32[outputDataPtr++];
             }
           }
diff --git a/src/library_sigs.js b/src/library_sigs.js
@@ -586,6 +586,7 @@ sigs = {
   emscripten_atomic_cancel_wait_async__sig: 'ii',
   emscripten_atomic_wait_async__sig: 'ipippd',
   emscripten_atomics_is_lock_free__sig: 'ii',
+  emscripten_audio_context_quantum_size__sig: 'ii',
   emscripten_audio_context_state__sig: 'ii',
   emscripten_audio_node_connect__sig: 'viiii',
   emscripten_audio_worklet_post_function_sig__sig: 'vippp',
diff --git a/src/library_webaudio.js b/src/library_webaudio.js
@@ -37,11 +37,21 @@ let LibraryWebAudio = {
   // Wasm handle ID.
   $emscriptenGetAudioObject: (objectHandle) => EmAudio[objectHandle],
 
-  // emscripten_create_audio_context() does not itself use
+  // Performs the work of getting the AudioContext's quantum size.
+  $emscriptenGetContextQuantumSize: (contextHandle) => {
+    // TODO: in a future release this will be something like:
+    //   return EmAudio[contextHandle].renderQuantumSize || 128;
+    // It comes two caveats: it needs the hint when generating the context adding to
+    // emscripten_create_audio_context(), and altering the quantum requires a secure
+    // context and fallback implementing. Until then we simply use the 1.0 API value:
+    return 128;
+  },
+
+  // emscripten_create_audio_context() does not itself use the
   // emscriptenGetAudioObject() function, but mark it as a dependency, because
   // the user will not be able to utilize the node unless they call
   // emscriptenGetAudioObject() on it on JS side to connect it to the graph, so
-  // this avoids the user needing to manually do it on the command line.
+  // this avoids the user needing to manually add the dependency on the command line.
   emscripten_create_audio_context__deps: ['$emscriptenRegisterAudioObject', '$emscriptenGetAudioObject'],
   emscripten_create_audio_context: (options) => {
     let ctx = window.AudioContext || window.webkitAudioContext;
@@ -264,6 +274,7 @@ let LibraryWebAudio = {
     });
   },
 
+  emscripten_create_wasm_audio_worklet_node__deps: ['$emscriptenGetContextQuantumSize'],
   emscripten_create_wasm_audio_worklet_node: (contextHandle, name, options, callback, userData) => {
 #if ASSERTIONS
     assert(contextHandle, `Called emscripten_create_wasm_audio_worklet_node() with a null Web Audio Context handle!`);
@@ -282,7 +293,11 @@ let LibraryWebAudio = {
       numberOfInputs: HEAP32[options],
       numberOfOutputs: HEAP32[options+1],
       outputChannelCount: HEAPU32[options+2] ? readChannelCountArray(HEAPU32[options+2]>>2, HEAP32[options+1]) : void 0,
-      processorOptions: { 'cb': callback, 'ud': userData }
+      processorOptions: {
+        'cb': callback,
+        'ud': userData,
+        'qs': emscriptenGetContextQuantumSize(contextHandle)
+      }
     } : void 0;
 
 #if WEBAUDIO_DEBUG
@@ -293,6 +308,15 @@ let LibraryWebAudio = {
   },
 #endif // ~AUDIO_WORKLET
 
+  emscripten_audio_context_quantum_size__deps: ['$emscriptenGetContextQuantumSize'],
+  emscripten_audio_context_quantum_size: (contextHandle) => {
+#if ASSERTIONS
+    assert(EmAudio[contextHandle], `Called emscripten_audio_context_quantum_size() with an invalid Web Audio Context handle ${contextHandle}`);
+    assert(EmAudio[contextHandle] instanceof (window.AudioContext || window.webkitAudioContext), `Called emscripten_audio_context_quantum_size() on handle ${contextHandle} that is not an AudioContext, but of type ${EmAudio[contextHandle]}`);
+#endif
+    return emscriptenGetContextQuantumSize(contextHandle);
+  },
+
   emscripten_audio_node_connect: (source, destination, outputIndex, inputIndex) => {
     var srcNode = EmAudio[source];
     var dstNode = EmAudio[destination];
diff --git a/src/struct_info.json b/src/struct_info.json
@@ -1198,6 +1198,20 @@
             ]
         }
     },
+    {
+        "file": "emscripten/webaudio.h",
+        "structs": {
+            "AudioSampleFrame": [
+              "numberOfChannels",
+              "quantumSize",
+              "data"
+            ],
+            "AudioParamFrame": [
+              "length",
+              "data"
+            ]
+        }
+    },
     {
         "file": "AL/al.h",
         "defines": [
diff --git a/src/struct_info_generated.json b/src/struct_info_generated.json
@@ -470,6 +470,17 @@
         "__WASI_RIGHTS_SOCK_SHUTDOWN": 268435456
     },
     "structs": {
+        "AudioParamFrame": {
+            "__size__": 8,
+            "data": 4,
+            "length": 0
+        },
+        "AudioSampleFrame": {
+            "__size__": 12,
+            "data": 8,
+            "numberOfChannels": 0,
+            "quantumSize": 4
+        },
         "EmscriptenBatteryEvent": {
             "__size__": 32,
             "charging": 24,
diff --git a/src/struct_info_generated_wasm64.json b/src/struct_info_generated_wasm64.json
@@ -470,6 +470,17 @@
         "__WASI_RIGHTS_SOCK_SHUTDOWN": 268435456
     },
     "structs": {
+        "AudioParamFrame": {
+            "__size__": 16,
+            "data": 8,
+            "length": 0
+        },
+        "AudioSampleFrame": {
+            "__size__": 16,
+            "data": 8,
+            "numberOfChannels": 0,
+            "quantumSize": 4
+        },
         "EmscriptenBatteryEvent": {
             "__size__": 32,
             "charging": 24,
diff --git a/system/include/emscripten/webaudio.h b/system/include/emscripten/webaudio.h
@@ -95,19 +95,27 @@ typedef void (*EmscriptenWorkletProcessorCreatedCallback)(EMSCRIPTEN_WEBAUDIO_T
 // userData3: A custom userdata pointer to pass to the callback function. This value will be passed on to the call to the given EmscriptenWorkletProcessorCreatedCallback callback function.
 void emscripten_create_wasm_audio_worklet_processor_async(EMSCRIPTEN_WEBAUDIO_T audioContext, const WebAudioWorkletProcessorCreateOptions *options, EmscriptenWorkletProcessorCreatedCallback callback, void *userData3);
 
+// Returns the number of samples processed per channel in an AudioSampleFrame, fixed at 128 in the Web Audio API 1.0 specification, and valid for the lifetime of the audio context.
+// For this to change from the default 128, the context would need creating with a yet unexposed WebAudioWorkletProcessorCreateOptions renderSizeHint, part of the 1.1 Web Audio API.
+int emscripten_audio_context_quantum_size(EMSCRIPTEN_WEBAUDIO_T audioContext);
+
 typedef int EMSCRIPTEN_AUDIO_WORKLET_NODE_T;
 
 typedef struct AudioSampleFrame
 {
+	// Number of audio channels to process (multiplied by quantumSize gives the elements in data)
 	const int numberOfChannels;
-	// An array of length numberOfChannels*128 elements, where data[channelIndex*128+i] locates the data of the i'th sample of channel channelIndex.
+	// Number of samples per channel in data
+	const int quantumSize;
+	// An array of length numberOfChannels*quantumSize elements. Samples are always arranged in a planar fashion,
+	// where data[channelIndex*quantumSize+i] locates the data of the i'th sample of channel channelIndex.
 	float *data;
 } AudioSampleFrame;
 
 typedef struct AudioParamFrame
 {
 	// Specifies the length of the input array data (in float elements). This will be guaranteed to either have
-	// a value of 1 or 128, depending on whether the audio parameter changed during this frame.
+	// a value of 1, for a parameter valid for the entire frame, or emscripten_audio_context_quantum_size() for a parameter that changes during the frame.
 	int length;
 	// An array of length specified in 'length'.
 	float *data;
diff --git a/test/webaudio/audio_worklet_tone_generator.c b/test/webaudio/audio_worklet_tone_generator.c
@@ -1,6 +1,8 @@
 #include <emscripten/webaudio.h>
 #include <emscripten/em_math.h>
 
+#include <stdio.h>
+
 // This program tests that sharing the WebAssembly Memory works between the
 // audio generator thread and the main browser UI thread.  Two sliders,
 // frequency and volume, can be adjusted on the HTML page, and the audio thread
@@ -25,7 +27,7 @@ float currentVolume = 0.3; // [local variable to the audio thread]
 volatile int audioProcessedCount = 0;
 #endif
 
-// This function will be called for every fixed 128 samples of audio to be processed.
+// This function will be called for every fixed-size buffer of audio samples to be processed.
 bool ProcessAudio(int numInputs, const AudioSampleFrame *inputs, int numOutputs, AudioSampleFrame *outputs, int numParams, const AudioParamFrame *params, void *userData) {
 #ifdef REPORT_RESULT
   ++audioProcessedCount;
@@ -38,12 +40,12 @@ bool ProcessAudio(int numInputs, const AudioSampleFrame *inputs, int numOutputs,
 
   // Produce a sine wave tone of desired frequency to all output channels.
   for(int o = 0; o < numOutputs; ++o)
-    for(int i = 0; i < 128; ++i)
+    for(int i = 0; i < outputs[o].quantumSize; ++i)
     {
       float s = emscripten_math_sin(phase);
       phase += phaseIncrement;
       for(int ch = 0; ch < outputs[o].numberOfChannels; ++ch)
-        outputs[o].data[ch*128 + i] = s * currentVolume;
+        outputs[o].data[ch*outputs[o].quantumSize + i] = s * currentVolume;
     }
 
   // Range reduce to keep precision around zero.
@@ -148,6 +150,12 @@ int main() {
 
   EMSCRIPTEN_WEBAUDIO_T context = emscripten_create_audio_context(&attrs);
 
+  // Get the context's quantum size. Once the audio API allows this to be user
+  // defined or exposes the hardware's own value, this will be needed to
+  // determine the worklet stack size.
+  int quantumSize = emscripten_audio_context_quantum_size(context);
+  printf("Context quantum size: %d\n", quantumSize);
+
   // and kick off Audio Worklet scope initialization, which shares the Wasm
   // Module and Memory to the AudioWorklet scope and initializes its stack.
   emscripten_start_wasm_audio_worklet_thread_async(context, wasmAudioWorkletStack, sizeof(wasmAudioWorkletStack), WebAudioWorkletThreadInitialized, 0);
diff --git a/test/webaudio/audioworklet.c b/test/webaudio/audioworklet.c
@@ -29,7 +29,7 @@ _Thread_local int testTlsVariable = 1;
 int lastTlsVariableValueInAudioThread = 1;
 #endif
 
-// This function will be called for every fixed 128 samples of audio to be processed.
+// This function will be called for every fixed-size buffer of audio samples to be processed.
 bool ProcessAudio(int numInputs, const AudioSampleFrame *inputs, int numOutputs, AudioSampleFrame *outputs, int numParams, const AudioParamFrame *params, void *userData) {
 #ifdef REPORT_RESULT
   assert(testTlsVariable == lastTlsVariableValueInAudioThread);
@@ -40,7 +40,7 @@ bool ProcessAudio(int numInputs, const AudioSampleFrame *inputs, int numOutputs,
 
   // Produce noise in all output channels.
   for(int i = 0; i < numOutputs; ++i)
-    for(int j = 0; j < 128*outputs[i].numberOfChannels; ++j)
+    for(int j = 0; j < outputs[i].quantumSize*outputs[i].numberOfChannels; ++j)
       outputs[i].data[j] = (rand() / (float)RAND_MAX * 2.0f - 1.0f) * 0.3f;
 
   // We generated audio and want to keep this processor going. Return false here to shut down.