finished

Brandon Lei · Brandon Lei · commit f224fc42c1a3 · 2024-11-13T18:24:52.000-05:00
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ This FastAPI backend handles speech-to-text, natural language processing, and te
 ### File Structure
 ```
 .
-├── main.py           # Main application file
+├── main.py          # Main application file
 ├── test.py          # Test file generator
 ├── .env             # Environment variables
 └── README.md        # This file
diff --git a/main.py b/main.py
@@ -3,7 +3,6 @@
 from pydantic import BaseModel
 from typing import Optional
 import azure.cognitiveservices.speech as speechsdk
-import openai
 from openai import OpenAI
 import pyaudio
 import wave
@@ -47,11 +46,7 @@ class VoiceAssistant:
     def __init__(self):
         self.conversation_history = []
         self.temp_dir = tempfile.mkdtemp()
-        
-        # Initialize OpenAI client
         self.openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
-        
-        # Initialize Azure Speech config
         self.speech_config = speechsdk.SpeechConfig(
             subscription=azure_speech_key,
             region=azure_service_region
@@ -82,7 +77,6 @@ async def record_audio(self) -> bytes:
                 data = stream.read(self.CHUNK)
                 frames.append(data)
             
-            # Save to temporary WAV file
             temp_path = os.path.join(self.temp_dir, "temp_recording.wav")
             wf = wave.open(temp_path, 'wb')
             wf.setnchannels(self.CHANNELS)
@@ -91,7 +85,6 @@ async def record_audio(self) -> bytes:
             wf.writeframes(b''.join(frames))
             wf.close()
             
-            # Read the file as bytes
             with open(temp_path, 'rb') as audio_file:
                 audio_bytes = audio_file.read()
             
@@ -104,23 +97,18 @@ async def record_audio(self) -> bytes:
 
     async def transcribe_audio(self, audio_bytes: bytes) -> str:
         """Convert speech to text using OpenAI Whisper"""
-        print("🔊 Transcribing audio...")
         try:
             response = self.openai_client.audio.transcriptions.create(
                 model="whisper-1",
                 file=("audio.wav", audio_bytes),
             )
-            print("✅ Transcription complete!")
-            print(response.text + "\n")
             return response.text
         except Exception as e:
             raise VoiceAssistantError(f"Transcription failed: {str(e)}")
 
     async def get_chat_response(self, text: str) -> str:
         """Get response from ChatGPT"""
-        print("💬 Getting chat response...")
         try:
-            # Add user message to conversation history
             self.conversation_history.append({"role": "user", "content": text})
             
             response = self.openai_client.chat.completions.create(
@@ -130,10 +118,6 @@ async def get_chat_response(self, text: str) -> str:
             )
             
             assistant_response = response.choices[0].message.content
-            print("✅ Chat response complete!")
-            print(assistant_response + "\n")
-            
-            # Add assistant response to conversation history
             self.conversation_history.append({"role": "assistant", "content": assistant_response})
             
             return assistant_response
@@ -154,7 +138,6 @@ async def synthesize_speech(self, text: str) -> str:
             result = synthesizer.speak_text_async(text).get()
             
             if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
-                print("✅ Speech synthesis complete!")
                 return output_path
             else:
                 raise VoiceAssistantError("Speech synthesis failed")
@@ -165,17 +148,11 @@ async def synthesize_speech(self, text: str) -> str:
     async def process_voice_input(self, audio_data: bytes = None) -> tuple[str, str]:
         """Process voice input and return response text and audio file path"""
         try:
-            # Record audio if not provided
             if audio_data is None:
                 audio_data = await self.record_audio()
             
-            # Convert speech to text
             transcript = await self.transcribe_audio(audio_data)
-            
-            # Get ChatGPT response
             response_text = await self.get_chat_response(transcript)
-            
-            # Convert response to speech
             audio_path = await self.synthesize_speech(response_text)
             
             return response_text, audio_path
@@ -191,7 +168,6 @@ def cleanup(self):
         except Exception:
             pass
 
-# API Models
 class ChatResponse(BaseModel):
     text: str
     audio_path: str
@@ -209,19 +185,15 @@ async def chat_endpoint(audio_file: UploadFile = File(None)):
     try:
         audio_data = None
         if audio_file:
-            # Read uploaded file
             audio_data = await audio_file.read()
         
         response_text, audio_path = await assistant.process_voice_input(audio_data)
         
-        # Read the audio file into memory before cleanup
         with open(audio_path, 'rb') as f:
             audio_content = f.read()
             
-        # Clean up files
         assistant.cleanup()
         
-        # Create a new temporary file for the response
         temp_response_path = tempfile.mktemp(suffix='.wav')
         with open(temp_response_path, 'wb') as f:
             f.write(audio_content)
diff --git a/test.py b/test.py
@@ -12,45 +12,35 @@ def record_audio(output_filename="test.wav", record_seconds=5):
     time.sleep(3)
     print("🎤 Recording...")
     
-    # Record audio
     recording = sd.rec(
         int(record_seconds * sample_rate),
         samplerate=sample_rate,
         channels=channels,
         dtype=np.int16
     )
     
-    # Show progress bar
     for i in range(record_seconds):
         progress = (i + 1) / record_seconds
         print(f"\rProgress: [{'=' * int(50 * progress)}{' ' * (50 - int(50 * progress))}] {int(progress * 100)}%", end='')
         time.sleep(1)
     
-    sd.wait()  # Wait until recording is finished
+    sd.wait()  
     print("\n✅ Finished recording!")
     
-    # Save as WAV
     wav.write(output_filename, sample_rate, recording)
     print(f"✅ Audio saved as {output_filename}")
     
     return output_filename
 
 def play_audio(filename):
     print(f"🔊 Playing {filename}...")
-    # Read the WAV file
     sample_rate, data = wav.read(filename)
-    
-    # Play the audio
     sd.play(data, sample_rate)
-    # Wait until the audio is finished
     sd.wait()
     print("✅ Playback complete!")
 
 if __name__ == "__main__":
-    # Record 5 seconds of audio and save as test.wav
     recorded_file = record_audio("test.wav", record_seconds=5)
-    
-    # Ask if user wants to play it back
     response = input("\nWould you like to play back the recording? (y/n): ")
     if response.lower() == 'y':
         play_audio(recorded_file)

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@ This FastAPI backend handles speech-to-text, natural language processing, and te`
`8`	`8`	`### File Structure`
`9`	`9`	```
`10`	`10`	`.`
`11`		`-├── main.py # Main application file`
	`11`	`+├── main.py # Main application file`
`12`	`12`	`├── test.py # Test file generator`
`13`	`13`	`├── .env # Environment variables`
`14`	`14`	`└── README.md # This file`