From 6219d3ec122eddb3968f20d05177c54c9db8f8ec Mon Sep 17 00:00:00 2001 From: Tianzi Cai Date: Fri, 16 Nov 2018 15:12:24 -0800 Subject: [PATCH 1/4] feat: video speech transcription --- .../main/java/com/example/video/Detect.java | 77 ++++++++++++++++++- .../test/java/com/example/video/DetectIT.java | 10 +++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/video/cloud-client/src/main/java/com/example/video/Detect.java b/video/cloud-client/src/main/java/com/example/video/Detect.java index e31f4414998..cac57bc5732 100644 --- a/video/cloud-client/src/main/java/com/example/video/Detect.java +++ b/video/cloud-client/src/main/java/com/example/video/Detect.java @@ -25,14 +25,21 @@ import com.google.cloud.videointelligence.v1.Feature; import com.google.cloud.videointelligence.v1.LabelAnnotation; import com.google.cloud.videointelligence.v1.LabelSegment; +import com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative; +import com.google.cloud.videointelligence.v1.SpeechTranscription; +import com.google.cloud.videointelligence.v1.SpeechTranscriptionConfig; import com.google.cloud.videointelligence.v1.VideoAnnotationResults; +import com.google.cloud.videointelligence.v1.VideoContext; import com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient; import com.google.cloud.videointelligence.v1.VideoSegment; +import com.google.cloud.videointelligence.v1.WordInfo; import com.google.protobuf.ByteString; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.concurrent.TimeUnit; + import org.apache.commons.codec.binary.Base64; @@ -83,6 +90,9 @@ public static void argsHelper(String[] args) throws Exception { if (command.equals("explicit-content")) { analyzeExplicitContent(path); } + if (command.equals("speech-transcription")) { + speechTranscription(path); + } } /** @@ -322,4 +332,69 @@ public static void analyzeExplicitContent(String gcsUri) throws Exception { // [END video_analyze_explicit_content] } } -} + + /** + * Transcribe speech from a video stored on GCS. + * + * @param gcsUri the path to the video file to analyze. + */ + public static void speechTranscription(String gcsUri) throws Exception { + // [START video_speech_transcription_gcs] + // Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient + try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) { + // Set the language code + SpeechTranscriptionConfig config = SpeechTranscriptionConfig.newBuilder() + .setLanguageCode("en-US") + .setEnableAutomaticPunctuation(true) + .build(); + + // Set the video context with the above configuration + VideoContext context = VideoContext.newBuilder() + .setSpeechTranscriptionConfig(config) + .build(); + + // Create the request + AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder() + .setInputUri(gcsUri) + .addFeatures(com.google.cloud.videointelligence.v1.Feature.SPEECH_TRANSCRIPTION) + .setVideoContext(context) + .build(); + + // asynchronously perform speech transcription on videos + OperationFuture response = + client.annotateVideoAsync(request); + + System.out.println("Waiting for operation to complete..."); + // Display the results + for (VideoAnnotationResults results : response.get(600, TimeUnit.SECONDS) + .getAnnotationResultsList()) { + for (SpeechTranscription speechTranscription : results.getSpeechTranscriptionsList()) { + try { + // Print the transcription + if (speechTranscription.getAlternativesCount() > 0) { + SpeechRecognitionAlternative alternative = speechTranscription.getAlternatives(0); + + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + System.out.printf("Confidence: %.2f\n", alternative.getConfidence()); + + System.out.println("Word level information:"); + for (WordInfo wordInfo : alternative.getWordsList()) { + double startTime = wordInfo.getStartTime().getSeconds() + + wordInfo.getStartTime().getNanos() / 1e9; + double endTime = wordInfo.getEndTime().getSeconds() + + wordInfo.getEndTime().getNanos() / 1e9; + System.out.printf("\t%4.2fs - %4.2fs: %s\n", + startTime, endTime, wordInfo.getWord()); + } + } else { + System.out.println("No transcription found"); + } + } catch (IndexOutOfBoundsException ioe) { + System.out.println("Could not retrieve frame: " + ioe.getMessage()); + } + } + } + // [END video_speech_transcription_gcs] + } + } +} \ No newline at end of file diff --git a/video/cloud-client/src/test/java/com/example/video/DetectIT.java b/video/cloud-client/src/test/java/com/example/video/DetectIT.java index fcc7a50cb85..cd75e6a887f 100644 --- a/video/cloud-client/src/test/java/com/example/video/DetectIT.java +++ b/video/cloud-client/src/test/java/com/example/video/DetectIT.java @@ -37,6 +37,7 @@ public class DetectIT { static final String LABEL_FILE_LOCATION = "./resources/cat.mp4"; static final String SHOTS_FILE_LOCATION = "gs://demomaker/gbikes_dinosaur.mp4"; static final String EXPLICIT_CONTENT_LOCATION = "gs://demomaker/cat.mp4"; + static final String SPEECH_GCS_LOCATION = "gs://python-docs-samples-tests/video/googlework_short.mp4"; @Before public void setUp() { @@ -84,4 +85,13 @@ public void testShots() throws Exception { assertThat(got).contains("Shots:"); assertThat(got).contains("Location: 0"); } + + @Test + public void testSpeechTranscription() throws Exception { + String[] args = {"speech-transcription", SPEECH_GCS_LOCATION}; + Detect.argsHelper(args); + String got = bout.toString(); + + assertThat(got).contains("cultural"); + } } From 833eaa3ebd50b3a1020099742034cd6543b60276 Mon Sep 17 00:00:00 2001 From: Tianzi Cai Date: Fri, 16 Nov 2018 15:15:44 -0800 Subject: [PATCH 2/4] Speech transcription command in README --- video/cloud-client/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/video/cloud-client/README.md b/video/cloud-client/README.md index 21d95360769..3565429ce1f 100644 --- a/video/cloud-client/README.md +++ b/video/cloud-client/README.md @@ -53,6 +53,11 @@ Detect Shots mvn exec:java -DDetect -Dexec.args="shots gs://demomaker/gbikes_dinosaur.mp4" ``` +Transcribe Speech +``` +mvn exec:java -DDetect -Dexec.args="speech-transcription gs://python-docs-samples-tests/video/googlework_short.mp4" +``` + From Windows, you may need to supply your classpath differently, for example: ``` mvn exec:java -DDetect -Dexec.args="labels gs://demomaker/cat.mp4" From d843baf7fd75595540df20a787f6d374ddeae1db Mon Sep 17 00:00:00 2001 From: Tianzi Cai Date: Fri, 16 Nov 2018 15:32:18 -0800 Subject: [PATCH 3/4] Nit fixes --- .../cloud-client/src/main/java/com/example/video/Detect.java | 4 ++-- .../src/test/java/com/example/video/DetectIT.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/video/cloud-client/src/main/java/com/example/video/Detect.java b/video/cloud-client/src/main/java/com/example/video/Detect.java index cac57bc5732..ee5dad37194 100644 --- a/video/cloud-client/src/main/java/com/example/video/Detect.java +++ b/video/cloud-client/src/main/java/com/example/video/Detect.java @@ -356,7 +356,7 @@ public static void speechTranscription(String gcsUri) throws Exception { // Create the request AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder() .setInputUri(gcsUri) - .addFeatures(com.google.cloud.videointelligence.v1.Feature.SPEECH_TRANSCRIPTION) + .addFeatures(Feature.SPEECH_TRANSCRIPTION) .setVideoContext(context) .build(); @@ -394,7 +394,7 @@ public static void speechTranscription(String gcsUri) throws Exception { } } } - // [END video_speech_transcription_gcs] } + // [END video_speech_transcription_gcs] } } \ No newline at end of file diff --git a/video/cloud-client/src/test/java/com/example/video/DetectIT.java b/video/cloud-client/src/test/java/com/example/video/DetectIT.java index cd75e6a887f..8651ef7d5a4 100644 --- a/video/cloud-client/src/test/java/com/example/video/DetectIT.java +++ b/video/cloud-client/src/test/java/com/example/video/DetectIT.java @@ -37,7 +37,7 @@ public class DetectIT { static final String LABEL_FILE_LOCATION = "./resources/cat.mp4"; static final String SHOTS_FILE_LOCATION = "gs://demomaker/gbikes_dinosaur.mp4"; static final String EXPLICIT_CONTENT_LOCATION = "gs://demomaker/cat.mp4"; - static final String SPEECH_GCS_LOCATION = "gs://python-docs-samples-tests/video/googlework_short.mp4"; + static final String SPEECH_GCS_LOCATION = "gs://java-docs-samples-testing/video/googlework_short.mp4"; @Before public void setUp() { From f6f59e97749164c8f77a2afb4f8870f6864984a2 Mon Sep 17 00:00:00 2001 From: Tianzi Cai Date: Fri, 16 Nov 2018 16:00:30 -0800 Subject: [PATCH 4/4] Reformat --- .../cloud-client/src/test/java/com/example/video/DetectIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/video/cloud-client/src/test/java/com/example/video/DetectIT.java b/video/cloud-client/src/test/java/com/example/video/DetectIT.java index 8651ef7d5a4..1404ec66888 100644 --- a/video/cloud-client/src/test/java/com/example/video/DetectIT.java +++ b/video/cloud-client/src/test/java/com/example/video/DetectIT.java @@ -37,7 +37,8 @@ public class DetectIT { static final String LABEL_FILE_LOCATION = "./resources/cat.mp4"; static final String SHOTS_FILE_LOCATION = "gs://demomaker/gbikes_dinosaur.mp4"; static final String EXPLICIT_CONTENT_LOCATION = "gs://demomaker/cat.mp4"; - static final String SPEECH_GCS_LOCATION = "gs://java-docs-samples-testing/video/googlework_short.mp4"; + static final String SPEECH_GCS_LOCATION = + "gs://java-docs-samples-testing/video/googlework_short.mp4"; @Before public void setUp() {