![]()
=18.0.0"
}
},
+ "node_modules/@aws-sdk/credential-providers": {
+ "version": "3.787.0",
+ "resolved": "https://registry.npmjs.org/@aws-sdk/credential-providers/-/credential-providers-3.787.0.tgz",
+ "integrity": "sha512-kR3RtI7drOc9pho13vWbUC2Bvrx9A0G4iizBDGmTs08NOdg4w3c1I4kdLG9tyPiIMeVnH+wYrsli5CM7xIfqiA==",
+ "license": "Apache-2.0",
+ "dependencies": {
+ "@aws-sdk/client-cognito-identity": "3.787.0",
+ "@aws-sdk/core": "3.775.0",
+ "@aws-sdk/credential-provider-cognito-identity": "3.787.0",
+ "@aws-sdk/credential-provider-env": "3.775.0",
+ "@aws-sdk/credential-provider-http": "3.775.0",
+ "@aws-sdk/credential-provider-ini": "3.787.0",
+ "@aws-sdk/credential-provider-node": "3.787.0",
+ "@aws-sdk/credential-provider-process": "3.775.0",
+ "@aws-sdk/credential-provider-sso": "3.787.0",
+ "@aws-sdk/credential-provider-web-identity": "3.787.0",
+ "@aws-sdk/nested-clients": "3.787.0",
+ "@aws-sdk/types": "3.775.0",
+ "@smithy/config-resolver": "^4.1.0",
+ "@smithy/core": "^3.2.0",
+ "@smithy/credential-provider-imds": "^4.0.2",
+ "@smithy/node-config-provider": "^4.0.2",
+ "@smithy/property-provider": "^4.0.2",
+ "@smithy/types": "^4.2.0",
+ "tslib": "^2.6.2"
+ },
+ "engines": {
+ "node": ">=18.0.0"
+ }
+ },
+ "node_modules/@aws-sdk/credential-providers/node_modules/@aws-sdk/types": {
+ "version": "3.775.0",
+ "resolved": "https://registry.npmjs.org/@aws-sdk/types/-/types-3.775.0.tgz",
+ "integrity": "sha512-ZoGKwa4C9fC9Av6bdfqcW6Ix5ot05F/S4VxWR2nHuMv7hzfmAjTOcUiWT7UR4hM/U0whf84VhDtXN/DWAk52KA==",
+ "license": "Apache-2.0",
+ "dependencies": {
+ "@smithy/types": "^4.2.0",
+ "tslib": "^2.6.2"
+ },
+ "engines": {
+ "node": ">=18.0.0"
+ }
+ },
"node_modules/@aws-sdk/endpoint-cache": {
"version": "3.723.0",
"resolved": "https://registry.npmjs.org/@aws-sdk/endpoint-cache/-/endpoint-cache-3.723.0.tgz",
@@ -10256,9 +10299,9 @@
}
},
"node_modules/@eslint-community/eslint-utils": {
- "version": "4.6.0",
- "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.6.0.tgz",
- "integrity": "sha512-WhCn7Z7TauhBtmzhvKpoQs0Wwb/kBcy4CwpuI0/eEIr2Lx2auxmulAzLr91wVZJaz47iUZdkXOK7WlAfxGKCnA==",
+ "version": "4.6.1",
+ "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.6.1.tgz",
+ "integrity": "sha512-KTsJMmobmbrFLe3LDh0PC2FXpcSYJt/MLjlkh/9LEnmKYLSYmT/0EW9JWANjeoemiuZrmogti0tW5Ch+qNUYDw==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -16274,9 +16317,9 @@
}
},
"node_modules/aws-cdk": {
- "version": "2.1007.0",
- "resolved": "https://registry.npmjs.org/aws-cdk/-/aws-cdk-2.1007.0.tgz",
- "integrity": "sha512-/UOYOTGWUm+pP9qxg03tID5tL6euC+pb+xo0RBue+xhnUWwj/Bbsw6DbqbpOPMrNzTUxmM723/uMEQmM6S26dw==",
+ "version": "2.1010.0",
+ "resolved": "https://registry.npmjs.org/aws-cdk/-/aws-cdk-2.1010.0.tgz",
+ "integrity": "sha512-kYNzBXVUZoRrTuYxRRA2Loz/Uvay0MqHobg8KPZaWylIbw/meUDgtoATRNt+stOdJ9PHODTjWmlDKI+2/KoF+w==",
"dev": true,
"license": "Apache-2.0",
"bin": {
@@ -16290,9 +16333,9 @@
}
},
"node_modules/aws-cdk-lib": {
- "version": "2.189.1",
- "resolved": "https://registry.npmjs.org/aws-cdk-lib/-/aws-cdk-lib-2.189.1.tgz",
- "integrity": "sha512-9JU0yUr2iRTJ1oCPrHyx7hOtBDWyUfyOcdb6arlumJnMcQr2cyAMASY8HuAXHc8Y10ipVp8dRTW+J4/132IIYA==",
+ "version": "2.190.0",
+ "resolved": "https://registry.npmjs.org/aws-cdk-lib/-/aws-cdk-lib-2.190.0.tgz",
+ "integrity": "sha512-D6BGf0Gg4s3XCnNiXnCgH1NHXYjngizs676HeytI4ekrUMtsw1ZmH9dlFBattH1x9gYX/9A+UxMkid+P4bNZKA==",
"bundleDependencies": [
"@balena/dockerignore",
"case",
@@ -17375,9 +17418,9 @@
}
},
"node_modules/caniuse-lite": {
- "version": "1.0.30001713",
- "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001713.tgz",
- "integrity": "sha512-wCIWIg+A4Xr7NfhTuHdX+/FKh3+Op3LBbSp2N5Pfx6T/LhdQy3GTyoTg48BReaW/MyMNZAkTadsBtai3ldWK0Q==",
+ "version": "1.0.30001714",
+ "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001714.tgz",
+ "integrity": "sha512-mtgapdwDLSSBnCI3JokHM7oEQBLxiJKVRtg10AxM1AyeiKcM96f0Mkbqeq+1AbiCtvMcHRulAAEMu693JrSWqg==",
"funding": [
{
"type": "opencollective",
@@ -19018,9 +19061,9 @@
}
},
"node_modules/deploy-time-build": {
- "version": "0.3.28",
- "resolved": "https://registry.npmjs.org/deploy-time-build/-/deploy-time-build-0.3.28.tgz",
- "integrity": "sha512-sfq2vHZL/35L7+mowo7E9W/jC1U/6DfNx7KQHwN4csTW6QbfwFtwVsTfQxvw2AooV3lVSv45IxoomznfvFdg9w==",
+ "version": "0.3.30",
+ "resolved": "https://registry.npmjs.org/deploy-time-build/-/deploy-time-build-0.3.30.tgz",
+ "integrity": "sha512-eG12huefiY6XFdN1URktbbtBBrIZoXdEEPMy0oUX5LN2k6ExoZEfOgvjW0N54FBrn9Rgfxmdh3kC7nEmUNn6iQ==",
"license": "MIT",
"peerDependencies": {
"aws-cdk-lib": "^2.38.0",
@@ -30529,9 +30572,9 @@
"license": "MIT"
},
"node_modules/sanitize-html": {
- "version": "2.15.0",
- "resolved": "https://registry.npmjs.org/sanitize-html/-/sanitize-html-2.15.0.tgz",
- "integrity": "sha512-wIjst57vJGpLyBP8ioUbg6ThwJie5SuSIjHxJg53v5Fg+kUK+AXlb7bK3RNXpp315MvwM+0OBGCV6h5pPHsVhA==",
+ "version": "2.16.0",
+ "resolved": "https://registry.npmjs.org/sanitize-html/-/sanitize-html-2.16.0.tgz",
+ "integrity": "sha512-0s4caLuHHaZFVxFTG74oW91+j6vW7gKbGD6CD2+miP73CE6z6YtOBN0ArtLd2UGyi4IC7K47v3ENUbQX4jV3Mg==",
"license": "MIT",
"dependencies": {
"deepmerge": "^4.2.2",
@@ -33108,14 +33151,17 @@
}
},
"node_modules/vite": {
- "version": "6.2.6",
- "resolved": "https://registry.npmjs.org/vite/-/vite-6.2.6.tgz",
- "integrity": "sha512-9xpjNl3kR4rVDZgPNdTL0/c6ao4km69a/2ihNQbcANz8RuCOK3hQBmLSJf3bRKVQjVMda+YvizNE8AwvogcPbw==",
+ "version": "6.3.1",
+ "resolved": "https://registry.npmjs.org/vite/-/vite-6.3.1.tgz",
+ "integrity": "sha512-kkzzkqtMESYklo96HKKPE5KKLkC1amlsqt+RjFMlX2AvbRB/0wghap19NdBxxwGZ+h/C6DLCrcEphPIItlGrRQ==",
"license": "MIT",
"dependencies": {
"esbuild": "^0.25.0",
+ "fdir": "^6.4.3",
+ "picomatch": "^4.0.2",
"postcss": "^8.5.3",
- "rollup": "^4.30.1"
+ "rollup": "^4.34.9",
+ "tinyglobby": "^0.2.12"
},
"bin": {
"vite": "bin/vite.js"
@@ -33299,6 +33345,20 @@
"url": "https://github.com/sponsors/jonschlinkert"
}
},
+ "node_modules/vite/node_modules/fdir": {
+ "version": "6.4.3",
+ "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.3.tgz",
+ "integrity": "sha512-PMXmW2y1hDDfTSRc9gaXIuCCRpuoz3Kaz8cUelp3smouvfT632ozg2vrT6lJsHKKOF59YLbOGfAWGUcKEfRMQw==",
+ "license": "MIT",
+ "peerDependencies": {
+ "picomatch": "^3 || ^4"
+ },
+ "peerDependenciesMeta": {
+ "picomatch": {
+ "optional": true
+ }
+ }
+ },
"node_modules/vite/node_modules/fsevents": {
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
@@ -33313,6 +33373,18 @@
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
+ "node_modules/vite/node_modules/picomatch": {
+ "version": "4.0.2",
+ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz",
+ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=12"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/jonschlinkert"
+ }
+ },
"node_modules/vitest": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/vitest/-/vitest-3.1.1.tgz",
@@ -34131,6 +34203,27 @@
"node": "^12.13.0 || ^14.15.0 || >=16.0.0"
}
},
+ "node_modules/ws": {
+ "version": "8.18.1",
+ "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.1.tgz",
+ "integrity": "sha512-RKW2aJZMXeMxVpnZ6bck+RswznaxmzdULiBr6KY7XkTnW8uvt0iT9H5DkHUChXrc+uurzwa0rVI16n/Xzjdz1w==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=10.0.0"
+ },
+ "peerDependencies": {
+ "bufferutil": "^4.0.1",
+ "utf-8-validate": ">=5.0.2"
+ },
+ "peerDependenciesMeta": {
+ "bufferutil": {
+ "optional": true
+ },
+ "utf-8-validate": {
+ "optional": true
+ }
+ }
+ },
"node_modules/xstate": {
"version": "4.38.3",
"resolved": "https://registry.npmjs.org/xstate/-/xstate-4.38.3.tgz",
@@ -34248,9 +34341,9 @@
}
},
"node_modules/zod": {
- "version": "3.24.2",
- "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz",
- "integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==",
+ "version": "3.24.3",
+ "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.3.tgz",
+ "integrity": "sha512-HhY1oqzWCQWuUqvBFnsyrtZRhyPeR7SUGv+C4+MsisMuVfSPx8HpwWqH8tRahSlt6M3PiFAcoeFhZAqIXTxoSg==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/colinhacks"
@@ -34309,9 +34402,12 @@
"@aws-sdk/client-sagemaker-runtime": "^3.755.0",
"@aws-sdk/client-transcribe": "^3.755.0",
"@aws-sdk/client-transcribe-streaming": "^3.755.0",
+ "@aws-sdk/credential-providers": "^3.755.0",
"@aws-sdk/lib-dynamodb": "^3.755.0",
"@aws-sdk/s3-request-presigner": "^3.755.0",
"@aws-solutions-constructs/aws-cloudfront-s3": "^2.68.0",
+ "@smithy/node-http-handler": "^4.0.4",
+ "aws-amplify": "^6.14.2",
"aws-cdk-lib": "^2.154.1",
"aws-jwt-verify": "^4.0.0",
"constructs": "^10.3.0",
@@ -34320,6 +34416,7 @@
"sanitize-html": "^2.13.0",
"source-map-support": "^0.5.21",
"upsert-slr": "^1.0.4",
+ "ws": "^8.18.0",
"zod": "^3.24.1"
},
"devDependencies": {
diff --git a/packages/cdk/cdk.json b/packages/cdk/cdk.json
index 6144db127..b60cee708 100644
--- a/packages/cdk/cdk.json
+++ b/packages/cdk/cdk.json
@@ -48,6 +48,7 @@
],
"imageGenerationModelIds": ["amazon.nova-canvas-v1:0"],
"videoGenerationModelIds": ["amazon.nova-reel-v1:0"],
+ "speechToSpeechModelIds": ["amazon.nova-sonic-v1:0"],
"endpointNames": [],
"agentEnabled": false,
"searchAgentEnabled": false,
diff --git a/packages/cdk/lambda/speechToSpeechTask.ts b/packages/cdk/lambda/speechToSpeechTask.ts
new file mode 100644
index 000000000..e7e5847f6
--- /dev/null
+++ b/packages/cdk/lambda/speechToSpeechTask.ts
@@ -0,0 +1,546 @@
+import { Amplify } from 'aws-amplify';
+import { events, EventsChannel } from 'aws-amplify/data';
+import { fromNodeProviderChain } from '@aws-sdk/credential-providers';
+import { randomUUID } from 'crypto';
+import {
+ BedrockRuntimeClient,
+ InvokeModelWithBidirectionalStreamCommand,
+ InvokeModelWithBidirectionalStreamInput,
+ InvokeModelWithBidirectionalStreamCommandOutput,
+ ModelStreamErrorException,
+} from '@aws-sdk/client-bedrock-runtime';
+import { NodeHttp2Handler } from '@smithy/node-http-handler';
+import {
+ SpeechToSpeechEventType,
+ SpeechToSpeechEvent,
+ Model,
+} from 'generative-ai-use-cases';
+
+Object.assign(global, { WebSocket: require('ws') });
+
+const MAX_AUDIO_INPUT_QUEUE_SIZE = 200;
+const MIN_AUDIO_OUTPUT_QUEUE_SIZE = 10;
+const MAX_AUDIO_OUTPUT_PER_BATCH = 20;
+
+// Flags
+let isActive = false;
+let isProcessingAudio = false;
+let isAudioStarted = false;
+
+// Queues
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+let eventQueue: Array
= [];
+let audioInputQueue: string[] = [];
+let audioOutputQueue: string[] = [];
+
+// IDs
+let promptName = randomUUID();
+let audioContentId = randomUUID();
+
+const clearQueue = () => {
+ eventQueue = [];
+ audioInputQueue = [];
+ audioOutputQueue = [];
+};
+
+const initialize = () => {
+ isActive = false;
+ isProcessingAudio = false;
+ isAudioStarted = false;
+
+ clearQueue();
+};
+
+const dispatchEvent = async (
+ channel: EventsChannel,
+ event: SpeechToSpeechEventType,
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ data: any = undefined
+) => {
+ try {
+ await channel.publish({
+ direction: 'btoc',
+ event,
+ data,
+ } as SpeechToSpeechEvent);
+ } catch (e) {
+ console.error(
+ 'Failed to publish the event via channel. The channel might be closed',
+ event,
+ data
+ );
+ }
+};
+
+const enqueueSessionStart = () => {
+ eventQueue.push({
+ event: {
+ sessionStart: {
+ inferenceConfiguration: {
+ maxTokens: 1024,
+ topP: 0.9,
+ temperature: 0.7,
+ },
+ },
+ },
+ });
+};
+
+const enqueuePromptStart = () => {
+ eventQueue.push({
+ event: {
+ promptStart: {
+ promptName,
+ textOutputConfiguration: {
+ mediaType: 'text/plain',
+ },
+ audioOutputConfiguration: {
+ audioType: 'SPEECH',
+ encoding: 'base64',
+ mediaType: 'audio/lpcm',
+ sampleRateHertz: 24000,
+ sampleSizeBits: 16,
+ channelCount: 1,
+ // TODO: avoid hardcoding
+ voiceId: 'tiffany',
+ },
+ },
+ },
+ });
+};
+
+const enqueueSystemPrompt = (prompt: string) => {
+ const contentName = randomUUID();
+
+ eventQueue.push({
+ event: {
+ contentStart: {
+ promptName,
+ contentName,
+ type: 'TEXT',
+ interactive: true,
+ role: 'SYSTEM',
+ textInputConfiguration: {
+ mediaType: 'text/plain',
+ },
+ },
+ },
+ });
+
+ eventQueue.push({
+ event: {
+ textInput: {
+ promptName,
+ contentName,
+ content: prompt,
+ },
+ },
+ });
+
+ eventQueue.push({
+ event: {
+ contentEnd: {
+ promptName,
+ contentName,
+ },
+ },
+ });
+};
+
+const enqueueAudioStart = () => {
+ audioContentId = randomUUID();
+
+ eventQueue.push({
+ event: {
+ contentStart: {
+ promptName,
+ contentName: audioContentId,
+ type: 'AUDIO',
+ interactive: true,
+ role: 'USER',
+ audioInputConfiguration: {
+ audioType: 'SPEECH',
+ encoding: 'base64',
+ mediaType: 'audio/lpcm',
+ sampleRateHertz: 16000,
+ sampleSizeBits: 16,
+ channelCount: 1,
+ },
+ },
+ },
+ });
+
+ isAudioStarted = true;
+};
+
+const enqueuePromptEnd = () => {
+ eventQueue.push({
+ event: {
+ promptEnd: {
+ promptName,
+ },
+ },
+ });
+};
+
+const enqueueSessionEnd = () => {
+ eventQueue.push({
+ event: {
+ sessionEnd: {},
+ },
+ });
+};
+
+const enqueueAudioStop = () => {
+ isAudioStarted = false;
+
+ clearQueue();
+
+ eventQueue.push({
+ event: {
+ contentEnd: {
+ promptName,
+ contentName: audioContentId,
+ },
+ },
+ });
+};
+
+const enqueueAudioInput = (audioInputBase64Array: string[]) => {
+ if (!isAudioStarted || !isActive) {
+ return;
+ }
+
+ for (const audioInput of audioInputBase64Array) {
+ audioInputQueue.push(audioInput);
+ }
+
+ // Audio input queue full, dropping oldest chunk
+ while (audioInputQueue.length - MAX_AUDIO_INPUT_QUEUE_SIZE > 0) {
+ audioInputQueue.shift();
+ }
+
+ if (!isProcessingAudio) {
+ isProcessingAudio = true;
+ // Start audio event loop
+ processAudioQueue();
+ }
+};
+
+const enqueueAudioOutput = async (
+ channel: EventsChannel,
+ audioOutput: string
+) => {
+ audioOutputQueue.push(audioOutput);
+
+ if (audioOutputQueue.length > MIN_AUDIO_OUTPUT_QUEUE_SIZE) {
+ const chunksToProcess: string[] = [];
+
+ let processedChunks = 0;
+
+ while (
+ audioOutputQueue.length > 0 &&
+ processedChunks < MAX_AUDIO_OUTPUT_PER_BATCH
+ ) {
+ const chunk = audioOutputQueue.shift();
+
+ if (chunk) {
+ chunksToProcess.push(chunk);
+ processedChunks += 1;
+ }
+ }
+
+ await dispatchEvent(channel, 'audioOutput', chunksToProcess);
+ }
+};
+
+const forcePublishAudioOutput = async (channel: EventsChannel) => {
+ const chunksToProcess = [];
+
+ while (audioOutputQueue.length > 0) {
+ const chunk = audioOutputQueue.shift();
+ if (chunk) {
+ chunksToProcess.push(chunk);
+ }
+ }
+
+ await dispatchEvent(channel, 'audioOutput', chunksToProcess);
+};
+
+const createAsyncIterator = () => {
+ return {
+ [Symbol.asyncIterator]: () => {
+ return {
+ next: async (): Promise<
+ IteratorResult
+ > => {
+ try {
+ while (eventQueue.length === 0 && isActive) {
+ await new Promise((s) => setTimeout(s, 100));
+ }
+
+ const nextEvent = eventQueue.shift();
+
+ if (!nextEvent) {
+ return { value: undefined, done: true };
+ }
+
+ if (nextEvent.event.sessionEnd) {
+ isActive = false;
+ }
+
+ return {
+ value: {
+ chunk: {
+ bytes: new TextEncoder().encode(JSON.stringify(nextEvent)),
+ },
+ },
+ done: false,
+ };
+ } catch (e) {
+ console.error('Error in asyncIterator', e);
+ return { value: undefined, done: true };
+ }
+ },
+ };
+ },
+ return: async () => {
+ return { value: undefined, done: true };
+ },
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ throw: async (error: any) => {
+ console.error(error);
+ throw error;
+ },
+ };
+};
+
+const processAudioQueue = async () => {
+ while (audioInputQueue.length > 0 && isAudioStarted && isActive) {
+ const audioChunk = audioInputQueue.shift();
+
+ eventQueue.push({
+ event: {
+ audioInput: {
+ promptName,
+ contentName: audioContentId,
+ content: audioChunk,
+ },
+ },
+ });
+ }
+
+ if (isAudioStarted && isActive) {
+ setTimeout(() => processAudioQueue(), 0);
+ } else {
+ console.log('Processing audio is ended.');
+ isProcessingAudio = false;
+ }
+};
+
+const processResponseStream = async (
+ channel: EventsChannel,
+ response: InvokeModelWithBidirectionalStreamCommandOutput
+) => {
+ if (!response.body) {
+ throw new Error('Response body is null');
+ }
+
+ for await (const event of response.body) {
+ try {
+ if (event.chunk?.bytes) {
+ const textResponse = new TextDecoder().decode(event.chunk.bytes);
+ const jsonResponse = JSON.parse(textResponse);
+
+ if (jsonResponse.event?.audioOutput) {
+ await enqueueAudioOutput(
+ channel,
+ jsonResponse.event.audioOutput.content
+ );
+ } else if (
+ jsonResponse.event?.contentEnd &&
+ jsonResponse.event?.contentEnd?.type === 'AUDIO'
+ ) {
+ await forcePublishAudioOutput(channel);
+ } else if (
+ jsonResponse.event?.contentStart &&
+ jsonResponse.event?.contentStart?.type === 'TEXT'
+ ) {
+ let generationStage = null;
+
+ if (jsonResponse.event?.contentStart?.additionalModelFields) {
+ generationStage = JSON.parse(
+ jsonResponse.event?.contentStart?.additionalModelFields
+ ).generationStage;
+ }
+
+ await dispatchEvent(channel, 'textStart', {
+ id: jsonResponse.event?.contentStart?.contentId,
+ role: jsonResponse.event?.contentStart?.role?.toLowerCase(),
+ generationStage,
+ });
+ } else if (jsonResponse.event?.textOutput) {
+ await dispatchEvent(channel, 'textOutput', {
+ id: jsonResponse.event?.textOutput?.contentId,
+ role: jsonResponse.event?.textOutput?.role?.toLowerCase(),
+ content: jsonResponse.event?.textOutput?.content,
+ });
+ } else if (
+ jsonResponse.event?.contentEnd &&
+ jsonResponse.event?.contentEnd?.type === 'TEXT'
+ ) {
+ await dispatchEvent(channel, 'textStop', {
+ id: jsonResponse.event?.contentEnd?.contentId,
+ role: jsonResponse.event?.contentEnd?.role?.toLowerCase(),
+ stopReason: jsonResponse.event?.contentEnd?.stopReason,
+ });
+ }
+ }
+ } catch (e) {
+ console.error('Error in processResponseStream', e);
+
+ if (e instanceof ModelStreamErrorException) {
+ console.log('Retrying...');
+ } else {
+ break;
+ }
+ }
+ }
+};
+
+export const handler = async (event: { channelId: string; model: Model }) => {
+ let channel: EventsChannel | null = null;
+
+ try {
+ console.log('event', event);
+
+ initialize();
+
+ isActive = true;
+
+ promptName = randomUUID();
+
+ console.log('promptName', promptName);
+
+ const bedrock = new BedrockRuntimeClient({
+ region: event.model.region,
+ requestHandler: new NodeHttp2Handler({
+ requestTimeout: 300000,
+ sessionTimeout: 300000,
+ disableConcurrentStreams: false,
+ maxConcurrentStreams: 1,
+ }),
+ });
+
+ console.log('Bedrock client initialized');
+
+ Amplify.configure(
+ {
+ API: {
+ Events: {
+ endpoint: process.env.EVENT_API_ENDPOINT!,
+ region: process.env.AWS_DEFAULT_REGION!,
+ defaultAuthMode: 'iam',
+ },
+ },
+ },
+ {
+ Auth: {
+ credentialsProvider: {
+ getCredentialsAndIdentityId: async () => {
+ const provider = fromNodeProviderChain();
+ const credentials = await provider();
+ return {
+ credentials,
+ };
+ },
+ clearCredentialsAndIdentityId: async () => {},
+ },
+ },
+ }
+ );
+
+ console.log('Amplify configured');
+ console.log(
+ `Connect to the channel /${process.env.NAMESPACE}/${event.channelId}`
+ );
+
+ channel = await events.connect(
+ `/${process.env.NAMESPACE}/${event.channelId}`
+ );
+
+ console.log('Connected!');
+
+ channel.subscribe({
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ next: async (data: any) => {
+ const event = data?.event;
+ if (event && event.direction === 'ctob') {
+ if (event.event === 'promptStart') {
+ enqueuePromptStart();
+ } else if (event.event === 'systemPrompt') {
+ enqueueSystemPrompt(event.data);
+ } else if (event.event === 'audioStart') {
+ enqueueAudioStart();
+ } else if (event.event === 'audioInput') {
+ enqueueAudioInput(event.data);
+ } else if (event.event === 'audioStop') {
+ // Currently we accept only one turn audio session
+ // Receiving 'audioStop' event means closing the session.
+ enqueueAudioStop();
+ enqueuePromptEnd();
+ enqueueSessionEnd();
+ }
+ }
+ },
+ error: console.error,
+ });
+
+ console.log('Subscribed to the channel');
+
+ enqueueSessionStart();
+
+ // Without this sleep, the error below is raised
+ // "Subscription has not been initialized"
+ console.log('Sleep...');
+ await new Promise((s) => setTimeout(s, 1000));
+
+ // Notify the status to the client
+ await dispatchEvent(channel, 'ready');
+
+ console.log("I'm ready");
+
+ const asyncIterator = createAsyncIterator();
+
+ console.log('Async iterator created');
+
+ const response = await bedrock.send(
+ new InvokeModelWithBidirectionalStreamCommand({
+ modelId: event.model.modelId,
+ body: asyncIterator,
+ })
+ );
+
+ console.log('Bidirectional stream command sent');
+
+ // Start response stream
+ await processResponseStream(channel, response);
+ } catch (e) {
+ console.error('Error in main process', e);
+ } finally {
+ try {
+ if (channel) {
+ console.log('Sending "end" event...');
+ await dispatchEvent(channel, 'end');
+
+ console.log('Close the channel');
+ channel.close();
+ }
+
+ initialize();
+ console.log('Session ended. Every parameters are initialized.');
+ } catch (e) {
+ console.error('Error during finalization', e);
+ }
+ }
+};
diff --git a/packages/cdk/lambda/startSpeechToSpeechSession.ts b/packages/cdk/lambda/startSpeechToSpeechSession.ts
new file mode 100644
index 000000000..140d5fde3
--- /dev/null
+++ b/packages/cdk/lambda/startSpeechToSpeechSession.ts
@@ -0,0 +1,42 @@
+import { APIGatewayProxyEvent, APIGatewayProxyResult } from 'aws-lambda';
+import {
+ LambdaClient,
+ InvokeCommand,
+ InvocationType,
+} from '@aws-sdk/client-lambda';
+
+export const handler = async (
+ event: APIGatewayProxyEvent
+): Promise => {
+ try {
+ const { channel, model } = JSON.parse(event.body!);
+ const lambda = new LambdaClient({});
+
+ await lambda.send(
+ new InvokeCommand({
+ FunctionName: process.env.SPEECH_TO_SPEECH_TASK_FUNCTION_ARN,
+ InvocationType: InvocationType.Event,
+ Payload: JSON.stringify({ channelId: channel, model }),
+ })
+ );
+
+ return {
+ statusCode: 200,
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Access-Control-Allow-Origin': '*',
+ },
+ body: JSON.stringify({ channel }),
+ };
+ } catch (error) {
+ console.log(error);
+ return {
+ statusCode: 500,
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Access-Control-Allow-Origin': '*',
+ },
+ body: JSON.stringify({ message: 'Internal Server Error' }),
+ };
+ }
+};
diff --git a/packages/cdk/lib/construct/index.ts b/packages/cdk/lib/construct/index.ts
index 1504de3f2..ff2ec9831 100644
--- a/packages/cdk/lib/construct/index.ts
+++ b/packages/cdk/lib/construct/index.ts
@@ -8,3 +8,4 @@ export * from './common-web-acl';
export * from './agent';
export * from './rag-knowledge-base';
export * from './guardrail';
+export * from './speech-to-speech';
diff --git a/packages/cdk/lib/construct/speech-to-speech.ts b/packages/cdk/lib/construct/speech-to-speech.ts
new file mode 100644
index 000000000..31052cd52
--- /dev/null
+++ b/packages/cdk/lib/construct/speech-to-speech.ts
@@ -0,0 +1,135 @@
+import { Duration } from 'aws-cdk-lib';
+import { Construct } from 'constructs';
+import * as appsync from 'aws-cdk-lib/aws-appsync';
+import * as cognito from 'aws-cdk-lib/aws-cognito';
+import * as agw from 'aws-cdk-lib/aws-apigateway';
+import { NodejsFunction } from 'aws-cdk-lib/aws-lambda-nodejs';
+import { Runtime } from 'aws-cdk-lib/aws-lambda';
+import { Effect, PolicyStatement } from 'aws-cdk-lib/aws-iam';
+import { ModelConfiguration } from 'generative-ai-use-cases';
+import { BEDROCK_SPEECH_TO_SPEECH_MODELS } from '@generative-ai-use-cases/common';
+
+export interface SpeechToSpeechProps {
+ readonly envSuffix: string;
+ readonly userPool: cognito.UserPool;
+ readonly api: agw.RestApi;
+ readonly speechToSpeechModelIds: ModelConfiguration[];
+}
+
+export class SpeechToSpeech extends Construct {
+ public readonly namespace: string;
+ public readonly eventApiEndpoint: string;
+
+ constructor(scope: Construct, id: string, props: SpeechToSpeechProps) {
+ super(scope, id);
+
+ const speechToSpeechModelIds = props.speechToSpeechModelIds;
+
+ for (const model of speechToSpeechModelIds) {
+ if (!BEDROCK_SPEECH_TO_SPEECH_MODELS.includes(model.modelId)) {
+ throw new Error(`Unsupported Model Name: ${model.modelId}`);
+ }
+ }
+
+ const channelNamespaceName = 'speech-to-speech';
+ const eventApi = new appsync.EventApi(this, 'EventApi', {
+ apiName: `SpeechToSpeech${props.envSuffix}`,
+ authorizationConfig: {
+ authProviders: [
+ {
+ authorizationType: appsync.AppSyncAuthorizationType.IAM,
+ },
+ {
+ authorizationType: appsync.AppSyncAuthorizationType.USER_POOL,
+ cognitoConfig: {
+ userPool: props.userPool,
+ },
+ },
+ ],
+ connectionAuthModeTypes: [
+ appsync.AppSyncAuthorizationType.IAM,
+ appsync.AppSyncAuthorizationType.USER_POOL,
+ ],
+ defaultPublishAuthModeTypes: [
+ appsync.AppSyncAuthorizationType.IAM,
+ appsync.AppSyncAuthorizationType.USER_POOL,
+ ],
+ defaultSubscribeAuthModeTypes: [
+ appsync.AppSyncAuthorizationType.IAM,
+ appsync.AppSyncAuthorizationType.USER_POOL,
+ ],
+ },
+ });
+
+ const namespace = new appsync.ChannelNamespace(this, 'ChannelName', {
+ api: eventApi,
+ channelNamespaceName,
+ });
+
+ const eventApiEndpoint = `https://${eventApi.httpDns}/event`;
+
+ const speechToSpeechTask = new NodejsFunction(this, 'Task', {
+ runtime: Runtime.NODEJS_LATEST,
+ entry: './lambda/speechToSpeechTask.ts',
+ timeout: Duration.minutes(15),
+ environment: {
+ EVENT_API_ENDPOINT: eventApiEndpoint,
+ NAMESPACE: channelNamespaceName,
+ },
+ bundling: {
+ nodeModules: ['@aws-sdk/client-bedrock-runtime'],
+ },
+ memorySize: 512,
+ });
+
+ eventApi.grantConnect(speechToSpeechTask);
+ namespace.grantPublishAndSubscribe(speechToSpeechTask);
+
+ speechToSpeechTask.role?.addToPrincipalPolicy(
+ new PolicyStatement({
+ effect: Effect.ALLOW,
+ resources: ['*'],
+ actions: ['bedrock:*'],
+ })
+ );
+
+ const startSpeechToSpeechSession = new NodejsFunction(
+ this,
+ 'StartSession',
+ {
+ runtime: Runtime.NODEJS_LATEST,
+ entry: './lambda/startSpeechToSpeechSession.ts',
+ timeout: Duration.minutes(15),
+ environment: {
+ SPEECH_TO_SPEECH_TASK_FUNCTION_ARN: speechToSpeechTask.functionArn,
+ },
+ bundling: {
+ nodeModules: ['@aws-sdk/client-bedrock-runtime'],
+ },
+ }
+ );
+
+ speechToSpeechTask.grantInvoke(startSpeechToSpeechSession);
+
+ const authorizer = new agw.CognitoUserPoolsAuthorizer(this, 'Authorizer', {
+ cognitoUserPools: [props.userPool],
+ });
+
+ const commonAuthorizerProps = {
+ authorizationType: agw.AuthorizationType.COGNITO,
+ authorizer,
+ };
+
+ const speechToSpeechResource =
+ props.api.root.addResource('speech-to-speech');
+
+ speechToSpeechResource.addMethod(
+ 'POST',
+ new agw.LambdaIntegration(startSpeechToSpeechSession),
+ commonAuthorizerProps
+ );
+
+ this.namespace = channelNamespaceName;
+ this.eventApiEndpoint = eventApiEndpoint;
+ }
+}
diff --git a/packages/cdk/lib/construct/web.ts b/packages/cdk/lib/construct/web.ts
index 4606c5a4d..079ebf879 100644
--- a/packages/cdk/lib/construct/web.ts
+++ b/packages/cdk/lib/construct/web.ts
@@ -47,6 +47,9 @@ export interface WebProps {
readonly hostedZoneId?: string | null;
readonly useCaseBuilderEnabled: boolean;
readonly hiddenUseCases: HiddenUseCases;
+ readonly speechToSpeechNamespace: string;
+ readonly speechToSpeechEventApiEndpoint: string;
+ readonly speechToSpeechModelIds: ModelConfiguration[];
}
export class Web extends Construct {
@@ -197,6 +200,12 @@ export class Web extends Construct {
VITE_APP_USE_CASE_BUILDER_ENABLED:
props.useCaseBuilderEnabled.toString(),
VITE_APP_HIDDEN_USE_CASES: JSON.stringify(props.hiddenUseCases),
+ VITE_APP_SPEECH_TO_SPEECH_NAMESPACE: props.speechToSpeechNamespace,
+ VITE_APP_SPEECH_TO_SPEECH_EVENT_API_ENDPOINT:
+ props.speechToSpeechEventApiEndpoint,
+ VITE_APP_SPEECH_TO_SPEECH_MODEL_IDS: JSON.stringify(
+ props.speechToSpeechModelIds
+ ),
},
});
// Enhance computing resources
diff --git a/packages/cdk/lib/generative-ai-use-cases-stack.ts b/packages/cdk/lib/generative-ai-use-cases-stack.ts
index 3e0ef4268..c327d403b 100644
--- a/packages/cdk/lib/generative-ai-use-cases-stack.ts
+++ b/packages/cdk/lib/generative-ai-use-cases-stack.ts
@@ -9,6 +9,7 @@ import {
RagKnowledgeBase,
Transcribe,
CommonWebAcl,
+ SpeechToSpeech,
} from './construct';
import { CfnWebACLAssociation } from 'aws-cdk-lib/aws-wafv2';
import * as cognito from 'aws-cdk-lib/aws-cognito';
@@ -106,6 +107,14 @@ export class GenerativeAiUseCasesStack extends Stack {
});
}
+ // SpeechToSpeech (for bidirectional communication)
+ const speechToSpeech = new SpeechToSpeech(this, 'SpeechToSpeech', {
+ envSuffix: params.env,
+ api: api.api,
+ userPool: auth.userPool,
+ speechToSpeechModelIds: params.speechToSpeechModelIds,
+ });
+
// Web Frontend
const web = new Web(this, 'Api', {
// Auth
@@ -135,6 +144,9 @@ export class GenerativeAiUseCasesStack extends Stack {
agentNames: api.agentNames,
inlineAgents: params.inlineAgents,
useCaseBuilderEnabled: params.useCaseBuilderEnabled,
+ speechToSpeechNamespace: speechToSpeech.namespace,
+ speechToSpeechEventApiEndpoint: speechToSpeech.eventApiEndpoint,
+ speechToSpeechModelIds: params.speechToSpeechModelIds,
// Frontend
hiddenUseCases: params.hiddenUseCases,
// Custom Domain
@@ -306,6 +318,18 @@ export class GenerativeAiUseCasesStack extends Stack {
value: JSON.stringify(params.hiddenUseCases),
});
+ new CfnOutput(this, 'SpeechToSpeechNamespace', {
+ value: speechToSpeech.namespace,
+ });
+
+ new CfnOutput(this, 'SpeechToSpeechEventApiEndpoint', {
+ value: speechToSpeech.eventApiEndpoint,
+ });
+
+ new CfnOutput(this, 'SpeechToSpeechModelIds', {
+ value: JSON.stringify(params.speechToSpeechModelIds),
+ });
+
this.userPool = auth.userPool;
this.userPoolClient = auth.client;
diff --git a/packages/cdk/lib/stack-input.ts b/packages/cdk/lib/stack-input.ts
index 3196e0fba..79c289d83 100644
--- a/packages/cdk/lib/stack-input.ts
+++ b/packages/cdk/lib/stack-input.ts
@@ -25,6 +25,7 @@ export const stackInputSchema = z.object({
video: z.boolean().optional(),
videoAnalyzer: z.boolean().optional(),
diagram: z.boolean().optional(),
+ speechToSpeech: z.boolean().optional(),
})
.default({}),
// API
@@ -68,6 +69,17 @@ export const stackInputSchema = z.object({
])
)
.default(['amazon.nova-reel-v1:0']),
+ speechToSpeechModelIds: z
+ .array(
+ z.union([
+ z.string(),
+ z.object({
+ modelId: z.string(),
+ region: z.string(),
+ }),
+ ])
+ )
+ .default(['amazon.nova-sonic-v1:0']),
endpointNames: z.array(z.string()).default([]),
crossAccountBedrockRoleArn: z.string().nullish(),
// RAG
@@ -165,6 +177,12 @@ export const processedStackInputSchema = stackInputSchema.extend({
region: z.string(),
})
),
+ speechToSpeechModelIds: z.array(
+ z.object({
+ modelId: z.string(),
+ region: z.string(),
+ })
+ ),
});
export type StackInput = z.infer;
diff --git a/packages/cdk/package.json b/packages/cdk/package.json
index 7898d44a4..c1a3a8a48 100644
--- a/packages/cdk/package.json
+++ b/packages/cdk/package.json
@@ -37,17 +37,21 @@
"@aws-sdk/client-sagemaker-runtime": "^3.755.0",
"@aws-sdk/client-transcribe": "^3.755.0",
"@aws-sdk/client-transcribe-streaming": "^3.755.0",
+ "@aws-sdk/credential-providers": "^3.755.0",
"@aws-sdk/lib-dynamodb": "^3.755.0",
"@aws-sdk/s3-request-presigner": "^3.755.0",
"@aws-solutions-constructs/aws-cloudfront-s3": "^2.68.0",
+ "aws-amplify": "^6.14.2",
"aws-cdk-lib": "^2.154.1",
"aws-jwt-verify": "^4.0.0",
"constructs": "^10.3.0",
"deploy-time-build": "^0.3.17",
"node-html-parser": "^6.1.13",
"sanitize-html": "^2.13.0",
+ "@smithy/node-http-handler": "^4.0.4",
"source-map-support": "^0.5.21",
"upsert-slr": "^1.0.4",
+ "ws": "^8.18.0",
"zod": "^3.24.1"
}
}
diff --git a/packages/cdk/parameter.ts b/packages/cdk/parameter.ts
index 535c1f599..c1b03ad67 100644
--- a/packages/cdk/parameter.ts
+++ b/packages/cdk/parameter.ts
@@ -67,5 +67,9 @@ export const getParams = (app: cdk.App): ProcessedStackInput => {
params.videoGenerationModelIds,
params.modelRegion
),
+ speechToSpeechModelIds: convertToModelConfiguration(
+ params.speechToSpeechModelIds,
+ params.modelRegion
+ ),
};
};
diff --git a/packages/cdk/test/__snapshots__/generative-ai-use-cases.test.ts.snap b/packages/cdk/test/__snapshots__/generative-ai-use-cases.test.ts.snap
index 968a1dd67..f8bf9ccfe 100644
--- a/packages/cdk/test/__snapshots__/generative-ai-use-cases.test.ts.snap
+++ b/packages/cdk/test/__snapshots__/generative-ai-use-cases.test.ts.snap
@@ -2041,6 +2041,29 @@ exports[`GenerativeAiUseCases matches the snapshot 5`] = `
"SelfSignUpEnabled": {
"Value": "true",
},
+ "SpeechToSpeechEventApiEndpoint": {
+ "Value": {
+ "Fn::Join": [
+ "",
+ [
+ "https://",
+ {
+ "Fn::GetAtt": [
+ "SpeechToSpeechEventApi1E2E9AB4",
+ "Dns.Http",
+ ],
+ },
+ "/event",
+ ],
+ ],
+ },
+ },
+ "SpeechToSpeechModelIds": {
+ "Value": "[{"modelId":"amazon.nova-sonic-v1:0","region":"us-east-1"}]",
+ },
+ "SpeechToSpeechNamespace": {
+ "Value": "speech-to-speech",
+ },
"UseCaseBuilderEnabled": {
"Value": "true",
},
@@ -2159,7 +2182,7 @@ exports[`GenerativeAiUseCases matches the snapshot 5`] = `
"Type": "AWS::IAM::Role",
"UpdateReplacePolicy": "Delete",
},
- "APIApiDeployment3A502123d75c6f1f887fef40596e8e44b7c34d5d": {
+ "APIApiDeployment3A502123457785fef82d1521cf56189f5ab6294b": {
"DeletionPolicy": "Delete",
"DependsOn": [
"APIApiApi4XXDCF913C8",
@@ -2230,6 +2253,9 @@ exports[`GenerativeAiUseCases matches the snapshot 5`] = `
"APIApisharesshareshareId3696CDAF",
"APIApisharesshareOPTIONS6D42A67D",
"APIApisharesshareF2EC0273",
+ "APIApispeechtospeechOPTIONS92DBE1B9",
+ "APIApispeechtospeechPOST8D76474A",
+ "APIApispeechtospeechD6FA255B",
"APIApisystemcontextssystemContextIdDELETEB527E743",
"APIApisystemcontextssystemContextIdOPTIONS96E8D02C",
"APIApisystemcontextssystemContextId9D6F9E56",
@@ -2306,7 +2332,7 @@ exports[`GenerativeAiUseCases matches the snapshot 5`] = `
],
"Properties": {
"DeploymentId": {
- "Ref": "APIApiDeployment3A502123d75c6f1f887fef40596e8e44b7c34d5d",
+ "Ref": "APIApiDeployment3A502123457785fef82d1521cf56189f5ab6294b",
},
"RestApiId": {
"Ref": "APIApiFFA96F67",
@@ -5869,6 +5895,172 @@ exports[`GenerativeAiUseCases matches the snapshot 5`] = `
"Type": "AWS::ApiGateway::Method",
"UpdateReplacePolicy": "Delete",
},
+ "APIApispeechtospeechD6FA255B": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "ParentId": {
+ "Fn::GetAtt": [
+ "APIApiFFA96F67",
+ "RootResourceId",
+ ],
+ },
+ "PathPart": "speech-to-speech",
+ "RestApiId": {
+ "Ref": "APIApiFFA96F67",
+ },
+ },
+ "Type": "AWS::ApiGateway::Resource",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "APIApispeechtospeechOPTIONS92DBE1B9": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "ApiKeyRequired": false,
+ "AuthorizationType": "NONE",
+ "HttpMethod": "OPTIONS",
+ "Integration": {
+ "IntegrationResponses": [
+ {
+ "ResponseParameters": {
+ "method.response.header.Access-Control-Allow-Headers": "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token,X-Amz-User-Agent'",
+ "method.response.header.Access-Control-Allow-Methods": "'OPTIONS,GET,PUT,POST,DELETE,PATCH,HEAD'",
+ "method.response.header.Access-Control-Allow-Origin": "'*'",
+ },
+ "StatusCode": "204",
+ },
+ ],
+ "RequestTemplates": {
+ "application/json": "{ statusCode: 200 }",
+ },
+ "Type": "MOCK",
+ },
+ "MethodResponses": [
+ {
+ "ResponseParameters": {
+ "method.response.header.Access-Control-Allow-Headers": true,
+ "method.response.header.Access-Control-Allow-Methods": true,
+ "method.response.header.Access-Control-Allow-Origin": true,
+ },
+ "StatusCode": "204",
+ },
+ ],
+ "ResourceId": {
+ "Ref": "APIApispeechtospeechD6FA255B",
+ },
+ "RestApiId": {
+ "Ref": "APIApiFFA96F67",
+ },
+ },
+ "Type": "AWS::ApiGateway::Method",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "APIApispeechtospeechPOST8D76474A": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "AuthorizationType": "COGNITO_USER_POOLS",
+ "AuthorizerId": {
+ "Ref": "SpeechToSpeechAuthorizerF61277A4",
+ },
+ "HttpMethod": "POST",
+ "Integration": {
+ "IntegrationHttpMethod": "POST",
+ "Type": "AWS_PROXY",
+ "Uri": {
+ "Fn::Join": [
+ "",
+ [
+ "arn:",
+ {
+ "Ref": "AWS::Partition",
+ },
+ ":apigateway:us-east-1:lambda:path/2015-03-31/functions/",
+ {
+ "Fn::GetAtt": [
+ "SpeechToSpeechStartSession80A7495E",
+ "Arn",
+ ],
+ },
+ "/invocations",
+ ],
+ ],
+ },
+ },
+ "ResourceId": {
+ "Ref": "APIApispeechtospeechD6FA255B",
+ },
+ "RestApiId": {
+ "Ref": "APIApiFFA96F67",
+ },
+ },
+ "Type": "AWS::ApiGateway::Method",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "APIApispeechtospeechPOSTApiPermissionGenerativeAiUseCasesStackAPIApi89219E17POSTspeechtospeech0FB686CB": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "Action": "lambda:InvokeFunction",
+ "FunctionName": {
+ "Fn::GetAtt": [
+ "SpeechToSpeechStartSession80A7495E",
+ "Arn",
+ ],
+ },
+ "Principal": "apigateway.amazonaws.com",
+ "SourceArn": {
+ "Fn::Join": [
+ "",
+ [
+ "arn:",
+ {
+ "Ref": "AWS::Partition",
+ },
+ ":execute-api:us-east-1:123456890123:",
+ {
+ "Ref": "APIApiFFA96F67",
+ },
+ "/",
+ {
+ "Ref": "APIApiDeploymentStageapiCD55D117",
+ },
+ "/POST/speech-to-speech",
+ ],
+ ],
+ },
+ },
+ "Type": "AWS::Lambda::Permission",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "APIApispeechtospeechPOSTApiPermissionTestGenerativeAiUseCasesStackAPIApi89219E17POSTspeechtospeech2C9E93F5": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "Action": "lambda:InvokeFunction",
+ "FunctionName": {
+ "Fn::GetAtt": [
+ "SpeechToSpeechStartSession80A7495E",
+ "Arn",
+ ],
+ },
+ "Principal": "apigateway.amazonaws.com",
+ "SourceArn": {
+ "Fn::Join": [
+ "",
+ [
+ "arn:",
+ {
+ "Ref": "AWS::Partition",
+ },
+ ":execute-api:us-east-1:123456890123:",
+ {
+ "Ref": "APIApiFFA96F67",
+ },
+ "/test-invoke-stage/POST/speech-to-speech",
+ ],
+ ],
+ },
+ },
+ "Type": "AWS::Lambda::Permission",
+ "UpdateReplacePolicy": "Delete",
+ },
"APIApisystemcontexts57785227": {
"DeletionPolicy": "Delete",
"Properties": {
@@ -13104,6 +13296,23 @@ exports[`GenerativeAiUseCases matches the snapshot 5`] = `
"VITE_APP_SAML_COGNITO_DOMAIN_NAME": "",
"VITE_APP_SAML_COGNITO_FEDERATED_IDENTITY_PROVIDER_NAME": "",
"VITE_APP_SELF_SIGN_UP_ENABLED": "true",
+ "VITE_APP_SPEECH_TO_SPEECH_EVENT_API_ENDPOINT": {
+ "Fn::Join": [
+ "",
+ [
+ "https://",
+ {
+ "Fn::GetAtt": [
+ "SpeechToSpeechEventApi1E2E9AB4",
+ "Dns.Http",
+ ],
+ },
+ "/event",
+ ],
+ ],
+ },
+ "VITE_APP_SPEECH_TO_SPEECH_MODEL_IDS": "[{"modelId":"amazon.nova-sonic-v1:0","region":"us-east-1"}]",
+ "VITE_APP_SPEECH_TO_SPEECH_NAMESPACE": "speech-to-speech",
"VITE_APP_USER_POOL_CLIENT_ID": {
"Ref": "AuthUserPoolclientA74673A9",
},
@@ -15108,7 +15317,7 @@ exports[`GenerativeAiUseCases matches the snapshot 5`] = `
"Arn",
],
},
- "Runtime": "nodejs18.x",
+ "Runtime": "nodejs20.x",
"Timeout": 300,
},
"Type": "AWS::Lambda::Function",
@@ -16086,6 +16295,349 @@ exports[`GenerativeAiUseCases matches the snapshot 5`] = `
"Type": "AWS::WAFv2::WebACL",
"UpdateReplacePolicy": "Delete",
},
+ "SpeechToSpeechAuthorizerF61277A4": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "IdentitySource": "method.request.header.Authorization",
+ "Name": "GenerativeAiUseCasesStackSpeechToSpeechAuthorizerC597101F",
+ "ProviderARNs": [
+ {
+ "Fn::GetAtt": [
+ "AuthUserPool8115E87F",
+ "Arn",
+ ],
+ },
+ ],
+ "RestApiId": {
+ "Ref": "APIApiFFA96F67",
+ },
+ "Type": "COGNITO_USER_POOLS",
+ },
+ "Type": "AWS::ApiGateway::Authorizer",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "SpeechToSpeechChannelNameCA32A058": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "ApiId": {
+ "Fn::GetAtt": [
+ "SpeechToSpeechEventApi1E2E9AB4",
+ "ApiId",
+ ],
+ },
+ "Name": "speech-to-speech",
+ },
+ "Type": "AWS::AppSync::ChannelNamespace",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "SpeechToSpeechEventApi1E2E9AB4": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "EventConfig": {
+ "AuthProviders": [
+ {
+ "AuthType": "AWS_IAM",
+ },
+ {
+ "AuthType": "AMAZON_COGNITO_USER_POOLS",
+ "CognitoConfig": {
+ "AwsRegion": "us-east-1",
+ "UserPoolId": {
+ "Ref": "AuthUserPool8115E87F",
+ },
+ },
+ },
+ ],
+ "ConnectionAuthModes": [
+ {
+ "AuthType": "AWS_IAM",
+ },
+ {
+ "AuthType": "AMAZON_COGNITO_USER_POOLS",
+ },
+ ],
+ "DefaultPublishAuthModes": [
+ {
+ "AuthType": "AWS_IAM",
+ },
+ {
+ "AuthType": "AMAZON_COGNITO_USER_POOLS",
+ },
+ ],
+ "DefaultSubscribeAuthModes": [
+ {
+ "AuthType": "AWS_IAM",
+ },
+ {
+ "AuthType": "AMAZON_COGNITO_USER_POOLS",
+ },
+ ],
+ },
+ "Name": "SpeechToSpeech",
+ },
+ "Type": "AWS::AppSync::Api",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "SpeechToSpeechStartSession80A7495E": {
+ "DeletionPolicy": "Delete",
+ "DependsOn": [
+ "SpeechToSpeechStartSessionServiceRoleDefaultPolicy4D6D3AC7",
+ "SpeechToSpeechStartSessionServiceRoleEBE56984",
+ ],
+ "Properties": {
+ "Code": {
+ "S3Bucket": "cdk-hnb659fds-assets-123456890123-us-east-1",
+ "S3Key": "HASH-REPLACED.zip",
+ },
+ "Environment": {
+ "Variables": {
+ "SPEECH_TO_SPEECH_TASK_FUNCTION_ARN": {
+ "Fn::GetAtt": [
+ "SpeechToSpeechTaskC1149BF3",
+ "Arn",
+ ],
+ },
+ },
+ },
+ "Handler": "index.handler",
+ "Role": {
+ "Fn::GetAtt": [
+ "SpeechToSpeechStartSessionServiceRoleEBE56984",
+ "Arn",
+ ],
+ },
+ "Runtime": "nodejs18.x",
+ "Timeout": 900,
+ },
+ "Type": "AWS::Lambda::Function",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "SpeechToSpeechStartSessionServiceRoleDefaultPolicy4D6D3AC7": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "PolicyDocument": {
+ "Statement": [
+ {
+ "Action": "lambda:InvokeFunction",
+ "Effect": "Allow",
+ "Resource": [
+ {
+ "Fn::GetAtt": [
+ "SpeechToSpeechTaskC1149BF3",
+ "Arn",
+ ],
+ },
+ {
+ "Fn::Join": [
+ "",
+ [
+ {
+ "Fn::GetAtt": [
+ "SpeechToSpeechTaskC1149BF3",
+ "Arn",
+ ],
+ },
+ ":*",
+ ],
+ ],
+ },
+ ],
+ },
+ ],
+ "Version": "2012-10-17",
+ },
+ "PolicyName": "SpeechToSpeechStartSessionServiceRoleDefaultPolicy4D6D3AC7",
+ "Roles": [
+ {
+ "Ref": "SpeechToSpeechStartSessionServiceRoleEBE56984",
+ },
+ ],
+ },
+ "Type": "AWS::IAM::Policy",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "SpeechToSpeechStartSessionServiceRoleEBE56984": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "AssumeRolePolicyDocument": {
+ "Statement": [
+ {
+ "Action": "sts:AssumeRole",
+ "Effect": "Allow",
+ "Principal": {
+ "Service": "lambda.amazonaws.com",
+ },
+ },
+ ],
+ "Version": "2012-10-17",
+ },
+ "ManagedPolicyArns": [
+ {
+ "Fn::Join": [
+ "",
+ [
+ "arn:",
+ {
+ "Ref": "AWS::Partition",
+ },
+ ":iam::aws:policy/service-role/AWSLambdaBasicExecutionRole",
+ ],
+ ],
+ },
+ ],
+ },
+ "Type": "AWS::IAM::Role",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "SpeechToSpeechTaskC1149BF3": {
+ "DeletionPolicy": "Delete",
+ "DependsOn": [
+ "SpeechToSpeechTaskServiceRoleDefaultPolicy1048205C",
+ "SpeechToSpeechTaskServiceRole6B9DD524",
+ ],
+ "Properties": {
+ "Code": {
+ "S3Bucket": "cdk-hnb659fds-assets-123456890123-us-east-1",
+ "S3Key": "HASH-REPLACED.zip",
+ },
+ "Environment": {
+ "Variables": {
+ "EVENT_API_ENDPOINT": {
+ "Fn::Join": [
+ "",
+ [
+ "https://",
+ {
+ "Fn::GetAtt": [
+ "SpeechToSpeechEventApi1E2E9AB4",
+ "Dns.Http",
+ ],
+ },
+ "/event",
+ ],
+ ],
+ },
+ "NAMESPACE": "speech-to-speech",
+ },
+ },
+ "Handler": "index.handler",
+ "MemorySize": 512,
+ "Role": {
+ "Fn::GetAtt": [
+ "SpeechToSpeechTaskServiceRole6B9DD524",
+ "Arn",
+ ],
+ },
+ "Runtime": "nodejs18.x",
+ "Timeout": 900,
+ },
+ "Type": "AWS::Lambda::Function",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "SpeechToSpeechTaskServiceRole6B9DD524": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "AssumeRolePolicyDocument": {
+ "Statement": [
+ {
+ "Action": "sts:AssumeRole",
+ "Effect": "Allow",
+ "Principal": {
+ "Service": "lambda.amazonaws.com",
+ },
+ },
+ ],
+ "Version": "2012-10-17",
+ },
+ "ManagedPolicyArns": [
+ {
+ "Fn::Join": [
+ "",
+ [
+ "arn:",
+ {
+ "Ref": "AWS::Partition",
+ },
+ ":iam::aws:policy/service-role/AWSLambdaBasicExecutionRole",
+ ],
+ ],
+ },
+ ],
+ },
+ "Type": "AWS::IAM::Role",
+ "UpdateReplacePolicy": "Delete",
+ },
+ "SpeechToSpeechTaskServiceRoleDefaultPolicy1048205C": {
+ "DeletionPolicy": "Delete",
+ "Properties": {
+ "PolicyDocument": {
+ "Statement": [
+ {
+ "Action": "appsync:EventConnect",
+ "Effect": "Allow",
+ "Resource": {
+ "Fn::Join": [
+ "",
+ [
+ "arn:",
+ {
+ "Ref": "AWS::Partition",
+ },
+ ":appsync:us-east-1:123456890123:apis/",
+ {
+ "Fn::GetAtt": [
+ "SpeechToSpeechEventApi1E2E9AB4",
+ "ApiId",
+ ],
+ },
+ ],
+ ],
+ },
+ },
+ {
+ "Action": [
+ "appsync:EventPublish",
+ "appsync:EventSubscribe",
+ ],
+ "Effect": "Allow",
+ "Resource": {
+ "Fn::Join": [
+ "",
+ [
+ "arn:",
+ {
+ "Ref": "AWS::Partition",
+ },
+ ":appsync:us-east-1:123456890123:apis/",
+ {
+ "Fn::GetAtt": [
+ "SpeechToSpeechEventApi1E2E9AB4",
+ "ApiId",
+ ],
+ },
+ "/channelNamespace/speech-to-speech",
+ ],
+ ],
+ },
+ },
+ {
+ "Action": "bedrock:*",
+ "Effect": "Allow",
+ "Resource": "*",
+ },
+ ],
+ "Version": "2012-10-17",
+ },
+ "PolicyName": "SpeechToSpeechTaskServiceRoleDefaultPolicy1048205C",
+ "Roles": [
+ {
+ "Ref": "SpeechToSpeechTaskServiceRole6B9DD524",
+ },
+ ],
+ },
+ "Type": "AWS::IAM::Policy",
+ "UpdateReplacePolicy": "Delete",
+ },
"TranscribeAudioBucket39DFF290": {
"DeletionPolicy": "Delete",
"Properties": {
diff --git a/packages/cdk/test/generative-ai-use-cases.test.ts b/packages/cdk/test/generative-ai-use-cases.test.ts
index 2000e5f75..3a526cbab 100644
--- a/packages/cdk/test/generative-ai-use-cases.test.ts
+++ b/packages/cdk/test/generative-ai-use-cases.test.ts
@@ -41,6 +41,9 @@ describe('GenerativeAiUseCases', () => {
videoGenerationModelIds: [
{ modelId: 'amazon.nova-reel-v1:0', region: 'us-east-1' },
],
+ speechToSpeechModelIds: [
+ { modelId: 'amazon.nova-sonic-v1:0', region: 'us-east-1' },
+ ],
endpointNames: [],
agentEnabled: true,
searchAgentEnabled: true,
diff --git a/packages/common/src/application/model.ts b/packages/common/src/application/model.ts
index db866fbed..c481ffd8c 100644
--- a/packages/common/src/application/model.ts
+++ b/packages/common/src/application/model.ts
@@ -19,6 +19,7 @@ const MODEL_FEATURE: Record = {
VIDEO_GEN: { video_gen: true },
EMBEDDING: { embedding: true },
RERANKING: { reranking: true },
+ SPEECH_TO_SPEECH: { speechToSpeech: true },
// Additional Flags
LIGHT: { light: true },
};
@@ -182,6 +183,11 @@ export const modelFeatureFlags: Record = {
'amazon.rerank-v1:0': MODEL_FEATURE.RERANKING,
// Cohere
'cohere.rerank-v3-5:0': MODEL_FEATURE.RERANKING,
+
+ // === Speech to Speech ===
+
+ // Amazon
+ 'amazon.nova-sonic-v1:0': MODEL_FEATURE.SPEECH_TO_SPEECH,
};
export const BEDROCK_TEXT_MODELS = Object.keys(modelFeatureFlags).filter(
@@ -199,3 +205,6 @@ export const BEDROCK_EMBEDDING_MODELS = Object.keys(modelFeatureFlags).filter(
export const BEDROCK_RERANKING_MODELS = Object.keys(modelFeatureFlags).filter(
(model) => modelFeatureFlags[model].reranking
);
+export const BEDROCK_SPEECH_TO_SPEECH_MODELS = Object.keys(
+ modelFeatureFlags
+).filter((model) => modelFeatureFlags[model].speechToSpeech);
diff --git a/packages/types/src/index.d.ts b/packages/types/src/index.d.ts
index dab578640..80e9cff45 100644
--- a/packages/types/src/index.d.ts
+++ b/packages/types/src/index.d.ts
@@ -15,3 +15,4 @@ export * from './model';
export * from './rag-knowledge-base';
export * from './useCases';
export * from './share';
+export * from './speech-to-speech';
diff --git a/packages/types/src/model.d.ts b/packages/types/src/model.d.ts
index b740e29c3..b4fa2f3a3 100644
--- a/packages/types/src/model.d.ts
+++ b/packages/types/src/model.d.ts
@@ -12,6 +12,9 @@ export type FeatureFlags = {
embedding?: boolean;
reranking?: boolean;
+
+ speechToSpeech?: boolean;
+
// Additional Flags
light?: boolean;
};
diff --git a/packages/types/src/speech-to-speech.d.ts b/packages/types/src/speech-to-speech.d.ts
new file mode 100644
index 000000000..627255605
--- /dev/null
+++ b/packages/types/src/speech-to-speech.d.ts
@@ -0,0 +1,22 @@
+// ctob: client (web) to bedrock (api)
+// btoc: bedrock (api) to client (web)
+export type SpeechToSpeechEventDirection = 'ctob' | 'btoc';
+
+export type SpeechToSpeechEventType =
+ | 'ready'
+ | 'end'
+ | 'promptStart'
+ | 'systemPrompt'
+ | 'audioStart'
+ | 'audioInput'
+ | 'audioStop'
+ | 'audioOutput'
+ | 'textStart'
+ | 'textOutput'
+ | 'textStop';
+
+export type SpeechToSpeechEvent = {
+ direction: SpeechToSpeechEventDirection;
+ event: SpeechToSpeechEventType;
+ data: any;
+};
diff --git a/packages/types/src/useCases.d.ts b/packages/types/src/useCases.d.ts
index b83bca1f9..bc8fc389c 100644
--- a/packages/types/src/useCases.d.ts
+++ b/packages/types/src/useCases.d.ts
@@ -8,6 +8,7 @@ export type HiddenUseCases = {
video?: boolean;
videoAnalyzer?: boolean;
diagram?: boolean;
+ voiceChat?: boolean;
};
export type HiddenUseCasesKeys = keyof HiddenUseCases;
diff --git a/packages/web/public/locales/translation/en.yaml b/packages/web/public/locales/translation/en.yaml
index b4e8fe0da..06dba94ae 100644
--- a/packages/web/public/locales/translation/en.yaml
+++ b/packages/web/public/locales/translation/en.yaml
@@ -582,6 +582,12 @@ landing:
description: >-
Video generation AI can create short videos based on text or images. The generated videos can be used as material in various scenes.
title: Video Generation
+ voice_chat:
+ description: >-
+ In Voice Chat, you can have a bidirectional voice chat with generative AI.
+ Similar to natural conversation, you can also interrupt and speak while the AI is talking.
+ Also, by setting a system prompt, you can have voice conversations with AI that has specific roles.
+ title: Voice Chat
web_content:
description: >-
Extract web content such as blogs and documents. LLMs remove unnecessary information and format it into a coherent text. The extracted content can be used in other use cases such as summarization and translation.
@@ -629,6 +635,7 @@ navigation:
translation: Translation
videoAnalysis: Video Analysis
videoGeneration: Video Generation
+ voiceChat: Voice Chat
webContentExtraction: Web Content Extraction
writing: Writing
notfound:
@@ -1101,6 +1108,15 @@ videoAnalyzer:
start: Start
stop: Stop
title: Video Analysis
+voiceChat:
+ close: Close session
+ default_system_prompt: You are an AI assistant.
+ experimental_warning: >-
+ Voice Chat is still in an experimental stage. The architecture, etc. may be changed in the future. Conversation history will not be saved. The supported language is English only.
+ experimental_warning_title: About Voice Chat
+ im_listening: I'm listening...
+ start: Start new session
+ title: Voice Chat
webcontent:
additional_context: Additional context
additional_context_placeholder: 'You can enter additional points to consider (e.g., summarize)'
diff --git a/packages/web/public/locales/translation/ja.yaml b/packages/web/public/locales/translation/ja.yaml
index 07c86a7f5..3bb168868 100644
--- a/packages/web/public/locales/translation/ja.yaml
+++ b/packages/web/public/locales/translation/ja.yaml
@@ -498,6 +498,12 @@ landing:
description: >-
動画生成 AI はテキストから短い動画を生成します。生成した動画は素材としてさまざまなシーンで活用できます。
title: 動画生成
+ voice_chat:
+ description: >-
+ 音声チャットでは生成 AI と双方向の音声によるチャットが可能です。
+ 自然な会話と同様、AI の発言中に割り込んで話すこともできます。
+ また、システムプロンプトを設定することで、特定の役割を持った AI と音声で会話することもできます。
+ title: 音声チャット
web_content:
description: >-
ブログやドキュメントなどの Web コンテンツを抽出します。LLM
@@ -548,6 +554,7 @@ navigation:
translation: 翻訳
videoAnalysis: 映像分析
videoGeneration: 動画生成
+ voiceChat: 音声チャット
webContentExtraction: Web コンテンツ抽出
writing: 執筆
notfound:
@@ -964,6 +971,15 @@ videoAnalyzer:
start: 開始
stop: 停止
title: 映像分析
+voiceChat:
+ close: セッションを終了する
+ default_system_prompt: You are an AI assistant.
+ experimental_warning: >-
+ 音声チャットはまだ実験的な段階です。アーキテクチャ等は今後変更される可能性があります。会話履歴は保存されません。対応言語は英語のみです。
+ experimental_warning_title: 音声チャットについて
+ im_listening: 発言してください
+ start: セッションを始める
+ title: 音声チャット
webcontent:
additional_context: 追加コンテキスト
additional_context_placeholder: '追加で考慮してほしい点を入力することができます(例: 要約して)'
diff --git a/packages/web/src/App.tsx b/packages/web/src/App.tsx
index cae7dba39..84b69defc 100644
--- a/packages/web/src/App.tsx
+++ b/packages/web/src/App.tsx
@@ -19,6 +19,7 @@ import {
PiVideoCamera,
PiFlowArrow,
PiMagicWand,
+ PiMicrophoneBold,
PiTreeStructure,
} from 'react-icons/pi';
import { Outlet } from 'react-router-dom';
@@ -44,6 +45,7 @@ const {
visionEnabled,
imageGenModelIds,
videoGenModelIds,
+ speechToSpeechModelIds,
agentNames,
flowChatEnabled,
} = MODELS;
@@ -131,6 +133,14 @@ const App: React.FC = () => {
display: 'usecase' as const,
}
: null,
+ speechToSpeechModelIds.length > 0 && enabled('voiceChat')
+ ? {
+ label: t('navigation.voiceChat'),
+ to: '/voice-chat',
+ icon: ,
+ display: 'usecase' as const,
+ }
+ : null,
enabled('generate')
? {
label: t('navigation.textGeneration'),
diff --git a/packages/web/src/components/AuthWithSAML.tsx b/packages/web/src/components/AuthWithSAML.tsx
index f580fc0e7..962e358f9 100644
--- a/packages/web/src/components/AuthWithSAML.tsx
+++ b/packages/web/src/components/AuthWithSAML.tsx
@@ -9,6 +9,8 @@ const samlCognitoDomainName: string = import.meta.env
.VITE_APP_SAML_COGNITO_DOMAIN_NAME;
const samlCognitoFederatedIdentityProviderName: string = import.meta.env
.VITE_APP_SAML_COGNITO_FEDERATED_IDENTITY_PROVIDER_NAME;
+const speechToSpeechEventApiEndpoint: string = import.meta.env
+ .VITE_APP_SPEECH_TO_SPEECH_EVENT_API_ENDPOINT;
type Props = {
children: React.ReactNode;
@@ -61,6 +63,13 @@ const AuthWithSAML: React.FC = (props) => {
},
},
},
+ API: {
+ Events: {
+ endpoint: speechToSpeechEventApiEndpoint,
+ region: process.env.VITE_APP_REGION!,
+ defaultAuthMode: 'userPool',
+ },
+ },
});
return (
diff --git a/packages/web/src/components/AuthWithUserpool.tsx b/packages/web/src/components/AuthWithUserpool.tsx
index 3d39ead25..a9185dd67 100644
--- a/packages/web/src/components/AuthWithUserpool.tsx
+++ b/packages/web/src/components/AuthWithUserpool.tsx
@@ -6,6 +6,8 @@ import { useTranslation } from 'react-i18next';
const selfSignUpEnabled: boolean =
import.meta.env.VITE_APP_SELF_SIGN_UP_ENABLED === 'true';
+const speechToSpeechEventApiEndpoint: string = import.meta.env
+ .VITE_APP_SPEECH_TO_SPEECH_EVENT_API_ENDPOINT;
type Props = {
children: React.ReactNode;
@@ -21,6 +23,13 @@ const AuthWithUserpool: React.FC = (props) => {
identityPoolId: import.meta.env.VITE_APP_IDENTITY_POOL_ID,
},
},
+ API: {
+ Events: {
+ endpoint: speechToSpeechEventApiEndpoint,
+ region: process.env.VITE_APP_REGION!,
+ defaultAuthMode: 'userPool',
+ },
+ },
});
I18n.putVocabularies(translations);
diff --git a/packages/web/src/components/ChatMessage.tsx b/packages/web/src/components/ChatMessage.tsx
index bdab0f976..b2682c547 100644
--- a/packages/web/src/components/ChatMessage.tsx
+++ b/packages/web/src/components/ChatMessage.tsx
@@ -27,6 +27,7 @@ type Props = BaseProps & {
chatContent?: ShownMessage;
loading?: boolean;
hideFeedback?: boolean;
+ hideSaveSystemContext?: boolean;
setSaveSystemContext?: (s: string) => void;
setShowSystemContextModal?: (value: boolean) => void;
allowRetry?: boolean;
@@ -236,7 +237,7 @@ const ChatMessage: React.FC = (props) => {