Speech multi-channel GA (GoogleCloudPlatform#1341)

nirupa-kumar · nnegrey · commit 944b26fc49e9 · 2019-02-19T14:37:07.000-08:00
diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
@@ -64,7 +64,7 @@ public static void main(String... args) throws Exception {
               + "Commands:\n"
               + "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
               + "\t| wordoffsets | auto-punctuation | stream-punctuation \n"
-              + "\t| enhanced-model | model-selection\n"
+              + "\t| enhanced-model | model-selection | multi-channel\n"
               + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
               + "for a Cloud Storage resource (gs://...)\n",
           Recognize.class.getCanonicalName());
@@ -112,6 +112,12 @@ public static void main(String... args) throws Exception {
       } else {
         transcribeModelSelection(path);
       }
+    } else if (command.equals("multi-channel")) {
+      if (path.startsWith("gs://")) {
+        transcribeMultiChannelGcs(path);
+      } else {
+        transcribeMultiChannel(path);
+      }
     }
   }
 
@@ -830,4 +836,90 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
     }
   }
   // [END speech_transcribe_model_selection_gcs]
+
+  // [START speech_transcribe_multichannel]
+  /**
+   * Transcribe a local audio file with multi-channel recognition
+   *
+   * @param fileName the path to local audio file
+   */
+  public static void transcribeMultiChannel(String fileName) throws Exception {
+    Path path = Paths.get(fileName);
+    byte[] content = Files.readAllBytes(path);
+
+    try (SpeechClient speechClient = SpeechClient.create()) {
+      // Get the contents of the local audio file
+      RecognitionAudio recognitionAudio =
+          RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+
+      // Configure request to enable multiple channels
+      RecognitionConfig config =
+          RecognitionConfig.newBuilder()
+              .setEncoding(AudioEncoding.LINEAR16)
+              .setLanguageCode("en-US")
+              .setSampleRateHertz(44100)
+              .setAudioChannelCount(2)
+              .setEnableSeparateRecognitionPerChannel(true)
+              .build();
+
+      // Perform the transcription request
+      RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
+
+      // Print out the results
+      for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
+        // There can be several alternative transcripts for a given chunk of speech. Just use the
+        // first (most likely) one here.
+        SpeechRecognitionAlternative alternative = result.getAlternatives(0);
+        System.out.format("Transcript : %s\n", alternative.getTranscript());
+        System.out.printf("Channel Tag : %s\n", result.getChannelTag());
+      }
+    }
+  }
+  // [END speech_transcribe_multichannel]
+
+  // [START speech_transcribe_multichannel_gcs]
+  /**
+   * Transcribe a remote audio file with multi-channel recognition
+   *
+   * @param gcsUri the path to the audio file
+   */
+  public static void transcribeMultiChannelGcs(String gcsUri) throws Exception {
+
+    try (SpeechClient speechClient = SpeechClient.create()) {
+
+      // Configure request to enable multiple channels
+      RecognitionConfig config =
+          RecognitionConfig.newBuilder()
+              .setEncoding(AudioEncoding.LINEAR16)
+              .setLanguageCode("en-US")
+              .setSampleRateHertz(44100)
+              .setAudioChannelCount(2)
+              .setEnableSeparateRecognitionPerChannel(true)
+              .build();
+
+      // Set the remote path for the audio file
+      RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+      // Use non-blocking call for getting file transcription
+      OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
+          speechClient.longRunningRecognizeAsync(config, audio);
+
+      while (!response.isDone()) {
+        System.out.println("Waiting for response...");
+        Thread.sleep(10000);
+      }
+      // Just print the first result here.
+      for (SpeechRecognitionResult result : response.get().getResultsList()) {
+
+        // There can be several alternative transcripts for a given chunk of speech. Just use the
+        // first (most likely) one here.
+        SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+
+        // Print out the result
+        System.out.printf("Transcript : %s\n", alternative.getTranscript());
+        System.out.printf("Channel Tag : %s\n", result.getChannelTag());
+      }
+    }
+  }
+  // [END speech_transcribe_multichannel_gcs]
 }
diff --git a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java
@@ -37,7 +37,10 @@ public class RecognizeIT {
 
   // The path to the audio file to transcribe
   private String audioFileName = "./resources/audio.raw";
+  private String multiChannelAudioFileName = "./resources/commercial_stereo.wav";
   private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac";
+  private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav";
+
   private String recognitionAudioFile = "./resources/commercial_mono.wav";
 
   // The path to the video file to transcribe
@@ -150,4 +153,18 @@ public void testGcsModelSelection() throws Exception {
     assertThat(got).contains("OK Google");
     assertThat(got).contains("the weather outside is sunny");
   }
+
+  @Test
+  public void testTranscribeMultiChannel() throws Exception {
+    Recognize.transcribeMultiChannel(multiChannelAudioFileName);
+    String got = bout.toString();
+    assertThat(got).contains("Channel Tag : 1");
+  }
+
+  @Test
+  public void testTranscribeMultiChannelGcs() throws Exception {
+    Recognize.transcribeMultiChannelGcs(gcsMultiChannelAudioPath);
+    String got = bout.toString();
+    assertThat(got).contains("Channel Tag : 1");
+  }
 }