diff --git a/video/beta/pom.xml b/video/beta/pom.xml index 909e1805e04..9e4b7e2cf4d 100644 --- a/video/beta/pom.xml +++ b/video/beta/pom.xml @@ -26,7 +26,7 @@ com.google.cloud.samples shared-configuration - 1.0.9 + 1.0.10 @@ -39,7 +39,7 @@ com.google.cloud google-cloud-video-intelligence - 0.56.0-beta + 0.67.0-beta diff --git a/video/beta/resources/cat.mp4 b/video/beta/resources/cat.mp4 new file mode 100644 index 00000000000..0e071b9ec67 Binary files /dev/null and b/video/beta/resources/cat.mp4 differ diff --git a/video/beta/resources/googlework_short.mp4 b/video/beta/resources/googlework_short.mp4 new file mode 100644 index 00000000000..be0f40f8ad6 Binary files /dev/null and b/video/beta/resources/googlework_short.mp4 differ diff --git a/video/beta/src/main/java/com/example/video/Detect.java b/video/beta/src/main/java/com/example/video/Detect.java index c3c003c1984..500c6405f43 100644 --- a/video/beta/src/main/java/com/example/video/Detect.java +++ b/video/beta/src/main/java/com/example/video/Detect.java @@ -105,7 +105,7 @@ public static void speechTranscription(String gcsUri) throws Exception { System.out.println("Waiting for operation to complete..."); // Display the results - for (VideoAnnotationResults results : response.get(180, TimeUnit.SECONDS) + for (VideoAnnotationResults results : response.get(300, TimeUnit.SECONDS) .getAnnotationResultsList()) { for (SpeechTranscription speechTranscription : results.getSpeechTranscriptionsList()) { try { diff --git a/video/beta/src/main/java/com/example/video/TextDetection.java b/video/beta/src/main/java/com/example/video/TextDetection.java new file mode 100644 index 00000000000..51e9ccd6184 --- /dev/null +++ b/video/beta/src/main/java/com/example/video/TextDetection.java @@ -0,0 +1,165 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.video; + +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoProgress; +import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoRequest; +import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoResponse; +import com.google.cloud.videointelligence.v1p2beta1.Feature; +import com.google.cloud.videointelligence.v1p2beta1.NormalizedVertex; +import com.google.cloud.videointelligence.v1p2beta1.TextAnnotation; +import com.google.cloud.videointelligence.v1p2beta1.TextFrame; +import com.google.cloud.videointelligence.v1p2beta1.TextSegment; +import com.google.cloud.videointelligence.v1p2beta1.VideoAnnotationResults; +import com.google.cloud.videointelligence.v1p2beta1.VideoIntelligenceServiceClient; +import com.google.cloud.videointelligence.v1p2beta1.VideoSegment; +import com.google.protobuf.ByteString; + +import com.google.protobuf.Duration; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.concurrent.TimeUnit; + +public class TextDetection { + + // [START video_detect_text_beta] + /** + * Detect text in a video. + * + * @param filePath the path to the video file to analyze. + */ + public static VideoAnnotationResults detectText(String filePath) throws Exception { + try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) { + // Read file + Path path = Paths.get(filePath); + byte[] data = Files.readAllBytes(path); + + // Create the request + AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder() + .setInputContent(ByteString.copyFrom(data)) + .addFeatures(Feature.TEXT_DETECTION) + .build(); + + // asynchronously perform object tracking on videos + OperationFuture future = + client.annotateVideoAsync(request); + + System.out.println("Waiting for operation to complete..."); + // The first result is retrieved because a single video was processed. + AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS); + VideoAnnotationResults results = response.getAnnotationResults(0); + + // Get only the first annotation for demo purposes. + TextAnnotation annotation = results.getTextAnnotations(0); + System.out.println("Text: " + annotation.getText()); + + // Get the first text segment. + TextSegment textSegment = annotation.getSegments(0); + System.out.println("Confidence: " + textSegment.getConfidence()); + // For the text segment display it's time offset + VideoSegment videoSegment = textSegment.getSegment(); + Duration startTimeOffset = videoSegment.getStartTimeOffset(); + Duration endTimeOffset = videoSegment.getEndTimeOffset(); + // Display the offset times in seconds, 1e9 is part of the formula to convert nanos to seconds + System.out.println(String.format("Start time: %.2f", + startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9)); + System.out.println(String.format("End time: %.2f", + endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9)); + + // Show the first result for the first frame in the segment. + TextFrame textFrame = textSegment.getFrames(0); + Duration timeOffset = textFrame.getTimeOffset(); + System.out.println(String.format("Time offset for the first frame: %.2f", + timeOffset.getSeconds() + timeOffset.getNanos() / 1e9)); + + // Display the rotated bounding box for where the text is on the frame. + System.out.println("Rotated Bounding Box Vertices:"); + List vertices = textFrame.getRotatedBoundingBox().getVerticesList(); + for (NormalizedVertex normalizedVertex : vertices) { + System.out.println(String.format( + "\tVertex.x: %.2f, Vertex.y: %.2f", + normalizedVertex.getX(), + normalizedVertex.getY())); + } + return results; + } + } + // [END video_detect_text_beta] + + // [START video_detect_text_gcs_beta] + /** + * Detect Text in a video. + * + * @param gcsUri the path to the video file to analyze. + */ + public static VideoAnnotationResults detectTextGcs(String gcsUri) throws Exception { + try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) { + // Create the request + AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder() + .setInputUri(gcsUri) + .addFeatures(Feature.TEXT_DETECTION) + .build(); + + // asynchronously perform object tracking on videos + OperationFuture future = + client.annotateVideoAsync(request); + + System.out.println("Waiting for operation to complete..."); + // The first result is retrieved because a single video was processed. + AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS); + VideoAnnotationResults results = response.getAnnotationResults(0); + + // Get only the first annotation for demo purposes. + TextAnnotation annotation = results.getTextAnnotations(0); + System.out.println("Text: " + annotation.getText()); + + // Get the first text segment. + TextSegment textSegment = annotation.getSegments(0); + System.out.println("Confidence: " + textSegment.getConfidence()); + // For the text segment display it's time offset + VideoSegment videoSegment = textSegment.getSegment(); + Duration startTimeOffset = videoSegment.getStartTimeOffset(); + Duration endTimeOffset = videoSegment.getEndTimeOffset(); + // Display the offset times in seconds, 1e9 is part of the formula to convert nanos to seconds + System.out.println(String.format("Start time: %.2f", + startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9)); + System.out.println(String.format("End time: %.2f", + endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9)); + + // Show the first result for the first frame in the segment. + TextFrame textFrame = textSegment.getFrames(0); + Duration timeOffset = textFrame.getTimeOffset(); + System.out.println(String.format("Time offset for the first frame: %.2f", + timeOffset.getSeconds() + timeOffset.getNanos() / 1e9)); + + // Display the rotated bounding box for where the text is on the frame. + System.out.println("Rotated Bounding Box Vertices:"); + List vertices = textFrame.getRotatedBoundingBox().getVerticesList(); + for (NormalizedVertex normalizedVertex : vertices) { + System.out.println(String.format( + "\tVertex.x: %.2f, Vertex.y: %.2f", + normalizedVertex.getX(), + normalizedVertex.getY())); + } + return results; + } + } + // [END video_detect_text_gcs_beta] +} diff --git a/video/beta/src/main/java/com/example/video/TrackObjects.java b/video/beta/src/main/java/com/example/video/TrackObjects.java new file mode 100644 index 00000000000..3a10fb89e1a --- /dev/null +++ b/video/beta/src/main/java/com/example/video/TrackObjects.java @@ -0,0 +1,175 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.video; + +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoProgress; +import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoRequest; +import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoResponse; +import com.google.cloud.videointelligence.v1p2beta1.Entity; +import com.google.cloud.videointelligence.v1p2beta1.Feature; +import com.google.cloud.videointelligence.v1p2beta1.NormalizedBoundingBox; +import com.google.cloud.videointelligence.v1p2beta1.ObjectTrackingAnnotation; +import com.google.cloud.videointelligence.v1p2beta1.ObjectTrackingFrame; +import com.google.cloud.videointelligence.v1p2beta1.VideoAnnotationResults; +import com.google.cloud.videointelligence.v1p2beta1.VideoIntelligenceServiceClient; +import com.google.cloud.videointelligence.v1p2beta1.VideoSegment; +import com.google.protobuf.ByteString; + +import com.google.protobuf.Duration; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.concurrent.TimeUnit; + +public class TrackObjects { + + // [START video_object_tracking_beta] + /** + * Track objects in a video. + * + * @param filePath the path to the video file to analyze. + */ + public static VideoAnnotationResults trackObjects(String filePath) throws Exception { + try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) { + // Read file + Path path = Paths.get(filePath); + byte[] data = Files.readAllBytes(path); + + // Create the request + AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder() + .setInputContent(ByteString.copyFrom(data)) + .addFeatures(Feature.OBJECT_TRACKING) + .setLocationId("us-east1") + .build(); + + // asynchronously perform object tracking on videos + OperationFuture future = + client.annotateVideoAsync(request); + + System.out.println("Waiting for operation to complete..."); + // The first result is retrieved because a single video was processed. + AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS); + VideoAnnotationResults results = response.getAnnotationResults(0); + + // Get only the first annotation for demo purposes. + ObjectTrackingAnnotation annotation = results.getObjectAnnotations(0); + System.out.println("Confidence: " + annotation.getConfidence()); + + if (annotation.hasEntity()) { + Entity entity = annotation.getEntity(); + System.out.println("Entity description: " + entity.getDescription()); + System.out.println("Entity id:: " + entity.getEntityId()); + } + + if (annotation.hasSegment()) { + VideoSegment videoSegment = annotation.getSegment(); + Duration startTimeOffset = videoSegment.getStartTimeOffset(); + Duration endTimeOffset = videoSegment.getEndTimeOffset(); + // Display the segment time in seconds, 1e9 converts nanos to seconds + System.out.println(String.format( + "Segment: %.2fs to %.2fs", + startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9, + endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9)); + } + + // Here we print only the bounding box of the first frame in this segment. + ObjectTrackingFrame frame = annotation.getFrames(0); + // Display the offset time in seconds, 1e9 converts nanos to seconds + Duration timeOffset = frame.getTimeOffset(); + System.out.println(String.format( + "Time offset of the first frame: %.2fs", + timeOffset.getSeconds() + timeOffset.getNanos() / 1e9)); + + // Display the bounding box of the detected object + NormalizedBoundingBox normalizedBoundingBox = frame.getNormalizedBoundingBox(); + System.out.println("Bounding box position:"); + System.out.println("\tleft: " + normalizedBoundingBox.getLeft()); + System.out.println("\ttop: " + normalizedBoundingBox.getTop()); + System.out.println("\tright: " + normalizedBoundingBox.getRight()); + System.out.println("\tbottom: " + normalizedBoundingBox.getBottom()); + return results; + } + } + // [END video_object_tracking_beta] + + // [START video_object_tracking_gcs_beta] + /** + * Track objects in a video. + * + * @param gcsUri the path to the video file to analyze. + */ + public static VideoAnnotationResults trackObjectsGcs(String gcsUri) throws Exception { + try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) { + // Create the request + AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder() + .setInputUri(gcsUri) + .addFeatures(Feature.OBJECT_TRACKING) + .setLocationId("us-east1") + .build(); + + // asynchronously perform object tracking on videos + OperationFuture future = + client.annotateVideoAsync(request); + + System.out.println("Waiting for operation to complete..."); + // The first result is retrieved because a single video was processed. + AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS); + VideoAnnotationResults results = response.getAnnotationResults(0); + + // Get only the first annotation for demo purposes. + ObjectTrackingAnnotation annotation = results.getObjectAnnotations(0); + System.out.println("Confidence: " + annotation.getConfidence()); + + if (annotation.hasEntity()) { + Entity entity = annotation.getEntity(); + System.out.println("Entity description: " + entity.getDescription()); + System.out.println("Entity id:: " + entity.getEntityId()); + } + + if (annotation.hasSegment()) { + VideoSegment videoSegment = annotation.getSegment(); + Duration startTimeOffset = videoSegment.getStartTimeOffset(); + Duration endTimeOffset = videoSegment.getEndTimeOffset(); + // Display the segment time in seconds, 1e9 converts nanos to seconds + System.out.println(String.format( + "Segment: %.2fs to %.2fs", + startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9, + endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9)); + } + + // Here we print only the bounding box of the first frame in this segment. + ObjectTrackingFrame frame = annotation.getFrames(0); + // Display the offset time in seconds, 1e9 converts nanos to seconds + Duration timeOffset = frame.getTimeOffset(); + System.out.println(String.format( + "Time offset of the first frame: %.2fs", + timeOffset.getSeconds() + timeOffset.getNanos() / 1e9)); + + // Display the bounding box of the detected object + NormalizedBoundingBox normalizedBoundingBox = frame.getNormalizedBoundingBox(); + System.out.println("Bounding box position:"); + System.out.println("\tleft: " + normalizedBoundingBox.getLeft()); + System.out.println("\ttop: " + normalizedBoundingBox.getTop()); + System.out.println("\tright: " + normalizedBoundingBox.getRight()); + System.out.println("\tbottom: " + normalizedBoundingBox.getBottom()); + return results; + } + } + // [END video_object_tracking_gcs_beta] +} + diff --git a/video/beta/src/test/java/com/example/video/DetectIT.java b/video/beta/src/test/java/com/example/video/DetectIT.java index e4de93f34b0..9e81648cadc 100644 --- a/video/beta/src/test/java/com/example/video/DetectIT.java +++ b/video/beta/src/test/java/com/example/video/DetectIT.java @@ -18,8 +18,13 @@ import static com.google.common.truth.Truth.assertThat; +import com.google.cloud.videointelligence.v1p2beta1.ObjectTrackingAnnotation; +import com.google.cloud.videointelligence.v1p2beta1.TextAnnotation; +import com.google.cloud.videointelligence.v1p2beta1.VideoAnnotationResults; import java.io.ByteArrayOutputStream; import java.io.PrintStream; +import java.util.Arrays; +import java.util.List; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -37,6 +42,10 @@ public class DetectIT { static final String FILE_LOCATION = "gs://java-docs-samples-testing/video/googlework_short.mp4"; + private static final List POSSIBLE_TEXTS = Arrays.asList( + "Google", "SUR", "SUR", "ROTO", "Vice President", "58oo9", "LONDRES", "OMAR", "PARIS", + "METRO", "RUE", "CARLO"); + @Before public void setUp() { bout = new ByteArrayOutputStream(); @@ -57,4 +66,68 @@ public void testSpeechTranscription() throws Exception { assertThat(got).contains("cultural"); } + + @Test + public void testTrackObjects() throws Exception { + VideoAnnotationResults result = TrackObjects.trackObjects("resources/cat.mp4"); + + boolean textExists = false; + for (ObjectTrackingAnnotation objectTrackingAnnotation : result.getObjectAnnotationsList()) { + if (objectTrackingAnnotation.getEntity().getDescription().toUpperCase().contains("CAT")) { + textExists = true; + break; + } + } + + assertThat(textExists).isTrue(); + } + + @Test + public void testTrackObjectsGcs() throws Exception { + VideoAnnotationResults result = TrackObjects.trackObjectsGcs("gs://demomaker/cat.mp4"); + + boolean textExists = false; + for (ObjectTrackingAnnotation objectTrackingAnnotation : result.getObjectAnnotationsList()) { + if (objectTrackingAnnotation.getEntity().getDescription().toUpperCase().contains("CAT")) { + textExists = true; + break; + } + } + + assertThat(textExists).isTrue(); + } + + @Test + public void testTextDetection() throws Exception { + VideoAnnotationResults result = TextDetection.detectText("resources/googlework_short.mp4"); + + boolean textExists = false; + for (TextAnnotation textAnnotation : result.getTextAnnotationsList()) { + for (String possibleText : POSSIBLE_TEXTS) { + if (textAnnotation.getText().toUpperCase().contains(possibleText.toUpperCase())) { + textExists = true; + break; + } + } + } + + assertThat(textExists).isTrue(); + } + + @Test + public void testTextDetectionGcs() throws Exception { + VideoAnnotationResults result = TextDetection.detectTextGcs(FILE_LOCATION); + + boolean textExists = false; + for (TextAnnotation textAnnotation : result.getTextAnnotationsList()) { + for (String possibleText : POSSIBLE_TEXTS) { + if (textAnnotation.getText().toUpperCase().contains(possibleText.toUpperCase())) { + textExists = true; + break; + } + } + } + + assertThat(textExists).isTrue(); + } }