Skip to content

Commit 944b26f

Browse files
nirupa-kumarnnegrey
authored andcommitted
Speech multi-channel GA (GoogleCloudPlatform#1341)
1 parent 75a52b2 commit 944b26f

File tree

2 files changed

+110
-1
lines changed

2 files changed

+110
-1
lines changed

speech/cloud-client/src/main/java/com/example/speech/Recognize.java

+93-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public static void main(String... args) throws Exception {
6464
+ "Commands:\n"
6565
+ "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
6666
+ "\t| wordoffsets | auto-punctuation | stream-punctuation \n"
67-
+ "\t| enhanced-model | model-selection\n"
67+
+ "\t| enhanced-model | model-selection | multi-channel\n"
6868
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
6969
+ "for a Cloud Storage resource (gs://...)\n",
7070
Recognize.class.getCanonicalName());
@@ -112,6 +112,12 @@ public static void main(String... args) throws Exception {
112112
} else {
113113
transcribeModelSelection(path);
114114
}
115+
} else if (command.equals("multi-channel")) {
116+
if (path.startsWith("gs://")) {
117+
transcribeMultiChannelGcs(path);
118+
} else {
119+
transcribeMultiChannel(path);
120+
}
115121
}
116122
}
117123

@@ -830,4 +836,90 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
830836
}
831837
}
832838
// [END speech_transcribe_model_selection_gcs]
839+
840+
// [START speech_transcribe_multichannel]
841+
/**
842+
* Transcribe a local audio file with multi-channel recognition
843+
*
844+
* @param fileName the path to local audio file
845+
*/
846+
public static void transcribeMultiChannel(String fileName) throws Exception {
847+
Path path = Paths.get(fileName);
848+
byte[] content = Files.readAllBytes(path);
849+
850+
try (SpeechClient speechClient = SpeechClient.create()) {
851+
// Get the contents of the local audio file
852+
RecognitionAudio recognitionAudio =
853+
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
854+
855+
// Configure request to enable multiple channels
856+
RecognitionConfig config =
857+
RecognitionConfig.newBuilder()
858+
.setEncoding(AudioEncoding.LINEAR16)
859+
.setLanguageCode("en-US")
860+
.setSampleRateHertz(44100)
861+
.setAudioChannelCount(2)
862+
.setEnableSeparateRecognitionPerChannel(true)
863+
.build();
864+
865+
// Perform the transcription request
866+
RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
867+
868+
// Print out the results
869+
for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
870+
// There can be several alternative transcripts for a given chunk of speech. Just use the
871+
// first (most likely) one here.
872+
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
873+
System.out.format("Transcript : %s\n", alternative.getTranscript());
874+
System.out.printf("Channel Tag : %s\n", result.getChannelTag());
875+
}
876+
}
877+
}
878+
// [END speech_transcribe_multichannel]
879+
880+
// [START speech_transcribe_multichannel_gcs]
881+
/**
882+
* Transcribe a remote audio file with multi-channel recognition
883+
*
884+
* @param gcsUri the path to the audio file
885+
*/
886+
public static void transcribeMultiChannelGcs(String gcsUri) throws Exception {
887+
888+
try (SpeechClient speechClient = SpeechClient.create()) {
889+
890+
// Configure request to enable multiple channels
891+
RecognitionConfig config =
892+
RecognitionConfig.newBuilder()
893+
.setEncoding(AudioEncoding.LINEAR16)
894+
.setLanguageCode("en-US")
895+
.setSampleRateHertz(44100)
896+
.setAudioChannelCount(2)
897+
.setEnableSeparateRecognitionPerChannel(true)
898+
.build();
899+
900+
// Set the remote path for the audio file
901+
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
902+
903+
// Use non-blocking call for getting file transcription
904+
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
905+
speechClient.longRunningRecognizeAsync(config, audio);
906+
907+
while (!response.isDone()) {
908+
System.out.println("Waiting for response...");
909+
Thread.sleep(10000);
910+
}
911+
// Just print the first result here.
912+
for (SpeechRecognitionResult result : response.get().getResultsList()) {
913+
914+
// There can be several alternative transcripts for a given chunk of speech. Just use the
915+
// first (most likely) one here.
916+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
917+
918+
// Print out the result
919+
System.out.printf("Transcript : %s\n", alternative.getTranscript());
920+
System.out.printf("Channel Tag : %s\n", result.getChannelTag());
921+
}
922+
}
923+
}
924+
// [END speech_transcribe_multichannel_gcs]
833925
}

speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java

+17
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@ public class RecognizeIT {
3737

3838
// The path to the audio file to transcribe
3939
private String audioFileName = "./resources/audio.raw";
40+
private String multiChannelAudioFileName = "./resources/commercial_stereo.wav";
4041
private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac";
42+
private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav";
43+
4144
private String recognitionAudioFile = "./resources/commercial_mono.wav";
4245

4346
// The path to the video file to transcribe
@@ -150,4 +153,18 @@ public void testGcsModelSelection() throws Exception {
150153
assertThat(got).contains("OK Google");
151154
assertThat(got).contains("the weather outside is sunny");
152155
}
156+
157+
@Test
158+
public void testTranscribeMultiChannel() throws Exception {
159+
Recognize.transcribeMultiChannel(multiChannelAudioFileName);
160+
String got = bout.toString();
161+
assertThat(got).contains("Channel Tag : 1");
162+
}
163+
164+
@Test
165+
public void testTranscribeMultiChannelGcs() throws Exception {
166+
Recognize.transcribeMultiChannelGcs(gcsMultiChannelAudioPath);
167+
String got = bout.toString();
168+
assertThat(got).contains("Channel Tag : 1");
169+
}
153170
}

0 commit comments

Comments
 (0)