Skip to content

Commit f224fc4

Browse files
Brandon LeiBrandon Lei
Brandon Lei
authored and
Brandon Lei
committed
finished
1 parent 05ca6a3 commit f224fc4

File tree

3 files changed

+2
-40
lines changed

3 files changed

+2
-40
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ This FastAPI backend handles speech-to-text, natural language processing, and te
88
### File Structure
99
```
1010
.
11-
├── main.py # Main application file
11+
├── main.py # Main application file
1212
├── test.py # Test file generator
1313
├── .env # Environment variables
1414
└── README.md # This file

main.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from pydantic import BaseModel
44
from typing import Optional
55
import azure.cognitiveservices.speech as speechsdk
6-
import openai
76
from openai import OpenAI
87
import pyaudio
98
import wave
@@ -47,11 +46,7 @@ class VoiceAssistant:
4746
def __init__(self):
4847
self.conversation_history = []
4948
self.temp_dir = tempfile.mkdtemp()
50-
51-
# Initialize OpenAI client
5249
self.openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
53-
54-
# Initialize Azure Speech config
5550
self.speech_config = speechsdk.SpeechConfig(
5651
subscription=azure_speech_key,
5752
region=azure_service_region
@@ -82,7 +77,6 @@ async def record_audio(self) -> bytes:
8277
data = stream.read(self.CHUNK)
8378
frames.append(data)
8479

85-
# Save to temporary WAV file
8680
temp_path = os.path.join(self.temp_dir, "temp_recording.wav")
8781
wf = wave.open(temp_path, 'wb')
8882
wf.setnchannels(self.CHANNELS)
@@ -91,7 +85,6 @@ async def record_audio(self) -> bytes:
9185
wf.writeframes(b''.join(frames))
9286
wf.close()
9387

94-
# Read the file as bytes
9588
with open(temp_path, 'rb') as audio_file:
9689
audio_bytes = audio_file.read()
9790

@@ -104,23 +97,18 @@ async def record_audio(self) -> bytes:
10497

10598
async def transcribe_audio(self, audio_bytes: bytes) -> str:
10699
"""Convert speech to text using OpenAI Whisper"""
107-
print("🔊 Transcribing audio...")
108100
try:
109101
response = self.openai_client.audio.transcriptions.create(
110102
model="whisper-1",
111103
file=("audio.wav", audio_bytes),
112104
)
113-
print("✅ Transcription complete!")
114-
print(response.text + "\n")
115105
return response.text
116106
except Exception as e:
117107
raise VoiceAssistantError(f"Transcription failed: {str(e)}")
118108

119109
async def get_chat_response(self, text: str) -> str:
120110
"""Get response from ChatGPT"""
121-
print("💬 Getting chat response...")
122111
try:
123-
# Add user message to conversation history
124112
self.conversation_history.append({"role": "user", "content": text})
125113

126114
response = self.openai_client.chat.completions.create(
@@ -130,10 +118,6 @@ async def get_chat_response(self, text: str) -> str:
130118
)
131119

132120
assistant_response = response.choices[0].message.content
133-
print("✅ Chat response complete!")
134-
print(assistant_response + "\n")
135-
136-
# Add assistant response to conversation history
137121
self.conversation_history.append({"role": "assistant", "content": assistant_response})
138122

139123
return assistant_response
@@ -154,7 +138,6 @@ async def synthesize_speech(self, text: str) -> str:
154138
result = synthesizer.speak_text_async(text).get()
155139

156140
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
157-
print("✅ Speech synthesis complete!")
158141
return output_path
159142
else:
160143
raise VoiceAssistantError("Speech synthesis failed")
@@ -165,17 +148,11 @@ async def synthesize_speech(self, text: str) -> str:
165148
async def process_voice_input(self, audio_data: bytes = None) -> tuple[str, str]:
166149
"""Process voice input and return response text and audio file path"""
167150
try:
168-
# Record audio if not provided
169151
if audio_data is None:
170152
audio_data = await self.record_audio()
171153

172-
# Convert speech to text
173154
transcript = await self.transcribe_audio(audio_data)
174-
175-
# Get ChatGPT response
176155
response_text = await self.get_chat_response(transcript)
177-
178-
# Convert response to speech
179156
audio_path = await self.synthesize_speech(response_text)
180157

181158
return response_text, audio_path
@@ -191,7 +168,6 @@ def cleanup(self):
191168
except Exception:
192169
pass
193170

194-
# API Models
195171
class ChatResponse(BaseModel):
196172
text: str
197173
audio_path: str
@@ -209,19 +185,15 @@ async def chat_endpoint(audio_file: UploadFile = File(None)):
209185
try:
210186
audio_data = None
211187
if audio_file:
212-
# Read uploaded file
213188
audio_data = await audio_file.read()
214189

215190
response_text, audio_path = await assistant.process_voice_input(audio_data)
216191

217-
# Read the audio file into memory before cleanup
218192
with open(audio_path, 'rb') as f:
219193
audio_content = f.read()
220194

221-
# Clean up files
222195
assistant.cleanup()
223196

224-
# Create a new temporary file for the response
225197
temp_response_path = tempfile.mktemp(suffix='.wav')
226198
with open(temp_response_path, 'wb') as f:
227199
f.write(audio_content)

test.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,45 +12,35 @@ def record_audio(output_filename="test.wav", record_seconds=5):
1212
time.sleep(3)
1313
print("🎤 Recording...")
1414

15-
# Record audio
1615
recording = sd.rec(
1716
int(record_seconds * sample_rate),
1817
samplerate=sample_rate,
1918
channels=channels,
2019
dtype=np.int16
2120
)
2221

23-
# Show progress bar
2422
for i in range(record_seconds):
2523
progress = (i + 1) / record_seconds
2624
print(f"\rProgress: [{'=' * int(50 * progress)}{' ' * (50 - int(50 * progress))}] {int(progress * 100)}%", end='')
2725
time.sleep(1)
2826

29-
sd.wait() # Wait until recording is finished
27+
sd.wait()
3028
print("\n✅ Finished recording!")
3129

32-
# Save as WAV
3330
wav.write(output_filename, sample_rate, recording)
3431
print(f"✅ Audio saved as {output_filename}")
3532

3633
return output_filename
3734

3835
def play_audio(filename):
3936
print(f"🔊 Playing {filename}...")
40-
# Read the WAV file
4137
sample_rate, data = wav.read(filename)
42-
43-
# Play the audio
4438
sd.play(data, sample_rate)
45-
# Wait until the audio is finished
4639
sd.wait()
4740
print("✅ Playback complete!")
4841

4942
if __name__ == "__main__":
50-
# Record 5 seconds of audio and save as test.wav
5143
recorded_file = record_audio("test.wav", record_seconds=5)
52-
53-
# Ask if user wants to play it back
5444
response = input("\nWould you like to play back the recording? (y/n): ")
5545
if response.lower() == 'y':
5646
play_audio(recorded_file)

0 commit comments

Comments
 (0)