1 /*
2  * Copyright 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package androidx.camera.mlkit.vision;
17 
18 import static androidx.camera.core.ImageAnalysis.COORDINATE_SYSTEM_ORIGINAL;
19 import static androidx.camera.core.ImageAnalysis.COORDINATE_SYSTEM_SENSOR;
20 import static androidx.camera.core.impl.utils.TransformUtils.getRectToRect;
21 import static androidx.camera.core.impl.utils.TransformUtils.rotateRect;
22 
23 import static com.google.android.gms.common.internal.Preconditions.checkArgument;
24 import static com.google.mlkit.vision.interfaces.Detector.TYPE_BARCODE_SCANNING;
25 import static com.google.mlkit.vision.interfaces.Detector.TYPE_SEGMENTATION;
26 import static com.google.mlkit.vision.interfaces.Detector.TYPE_TEXT_RECOGNITION;
27 
28 import android.graphics.Matrix;
29 import android.graphics.RectF;
30 import android.media.Image;
31 import android.util.Size;
32 
33 import androidx.annotation.OptIn;
34 import androidx.camera.core.ExperimentalGetImage;
35 import androidx.camera.core.ImageAnalysis;
36 import androidx.camera.core.ImageProxy;
37 import androidx.camera.core.Logger;
38 import androidx.camera.view.TransformExperimental;
39 import androidx.camera.view.transform.ImageProxyTransformFactory;
40 import androidx.core.util.Consumer;
41 
42 import com.google.android.gms.tasks.Task;
43 import com.google.mlkit.vision.interfaces.Detector;
44 
45 import org.jspecify.annotations.NonNull;
46 import org.jspecify.annotations.Nullable;
47 
48 import java.util.ArrayList;
49 import java.util.HashMap;
50 import java.util.List;
51 import java.util.Map;
52 import java.util.concurrent.CancellationException;
53 import java.util.concurrent.Executor;
54 
55 /**
56  * An implementation of {@link ImageAnalysis.Analyzer} with ML Kit libraries.
57  *
58  * <p> This class is a wrapper of one or many ML Kit {@code Detector}s. It forwards
59  * {@link ImageAnalysis} frames to all the {@code Detector}s sequentially. Once all the
60  * {@code Detector}s finish analyzing the frame, {@link Consumer#accept} will be
61  * invoked with the aggregated analysis results.
62  *
63  * <p> This class handles the coordinate transformation between ML Kit output and the target
64  * coordinate system. Using the {@code targetCoordinateSystem} set in the constructor, it
65  * calculates the {@link Matrix} with the value provided by CameraX via
66  * {@link ImageAnalysis.Analyzer#updateTransform} and forwards it to the ML Kit {@code Detector}.
67  * The coordinates returned by MLKit will be in the specified coordinate system.
68  *
69  * <p> This class is designed to work seamlessly with the {@code CameraController} class in
70  * camera-view. When used with {@link ImageAnalysis} in camera-core, the following scenarios may
71  * need special handling:
72  * <ul>
73  * <li> Cannot transform coordinates to UI coordinate system. e.g. camera-core only supports
74  * {@link ImageAnalysis#COORDINATE_SYSTEM_ORIGINAL}.
75  * <li>For the value of {@link #getDefaultTargetResolution()} to be effective, make sure
76  * the {@link ImageAnalysis#setAnalyzer} is called before it's bound to the lifecycle.
77  * </ul>
78  *
79  * Code sample:
80  * <pre><code>
81  *  cameraController.setImageAnalysisAnalyzer(executor,
82  *       new MlKitAnalyzer(List.of(barcodeScanner), COORDINATE_SYSTEM_VIEW_REFERENCED,
83  *       executor, result -> {
84  *    // The value of result.getResult(barcodeScanner) can be used directly for drawing UI overlay.
85  *  });
86  * </pre></code>
87  *
88  * @see ImageAnalysis.Analyzer
89  */
90 public class MlKitAnalyzer implements ImageAnalysis.Analyzer {
91 
92     private static final String TAG = "MlKitAnalyzer";
93 
94     private static final Size DEFAULT_SIZE = new Size(480, 360);
95 
96     private final @NonNull List<Detector<?>> mDetectors;
97     private final int mTargetCoordinateSystem;
98     // Synthetic access
99     final @NonNull Consumer<Result> mConsumer;
100     // Synthetic access
101     final ImageProxyTransformFactory mImageAnalysisTransformFactory;
102     private final @NonNull Executor mExecutor;
103 
104     private @Nullable Matrix mSensorToTarget;
105 
106     /**
107      * Constructor of {@link MlKitAnalyzer}.
108      *
109      * <p>The list of detectors will be invoked sequentially in order.
110      *
111      * <p>When the targetCoordinateSystem is {@link ImageAnalysis#COORDINATE_SYSTEM_ORIGINAL}, the
112      * output coordinate system is defined by ML Kit, which is the buffer with rotation applied. For
113      * example, if {@link ImageProxy#getHeight()} is {@code h} and the rotation is 90°, (0, 0) in
114      * the result maps to the pixel (0, h) in the original buffer.
115      *
116      * <p>The constructor throws {@link IllegalArgumentException} if
117      * {@code Detector#getDetectorType()} is TYPE_SEGMENTATION and {@code targetCoordinateSystem}
118      * is COORDINATE_SYSTEM_ORIGINAL. Currently ML Kit does not support transformation with
119      * segmentation.
120      *
121      * @param detectors              list of ML Kit {@link Detector}.
122      * @param targetCoordinateSystem e.g. {@link ImageAnalysis#COORDINATE_SYSTEM_ORIGINAL}
123      *                               the coordinates in ML Kit output will be based on this value.
124      * @param executor               on which the consumer is invoked.
125      * @param consumer               invoked when there is a new ML Kit result.
126      */
127     @OptIn(markerClass = TransformExperimental.class)
MlKitAnalyzer( @onNull List<Detector<?>> detectors, int targetCoordinateSystem, @NonNull Executor executor, @NonNull Consumer<Result> consumer)128     public MlKitAnalyzer(
129             @NonNull List<Detector<?>> detectors,
130             int targetCoordinateSystem,
131             @NonNull Executor executor,
132             @NonNull Consumer<Result> consumer) {
133         if (targetCoordinateSystem != COORDINATE_SYSTEM_ORIGINAL) {
134             for (Detector<?> detector : detectors) {
135                 checkArgument(detector.getDetectorType() != TYPE_SEGMENTATION,
136                         "Segmentation only works with COORDINATE_SYSTEM_ORIGINAL");
137             }
138         }
139         // Make an immutable copy of the app provided detectors.
140         mDetectors = new ArrayList<>(detectors);
141         mTargetCoordinateSystem = targetCoordinateSystem;
142         mConsumer = consumer;
143         mExecutor = executor;
144         mImageAnalysisTransformFactory = new ImageProxyTransformFactory();
145         mImageAnalysisTransformFactory.setUsingRotationDegrees(true);
146     }
147 
148     /**
149      * Analyzes the image with the ML Kit {@code Detector}s.
150      *
151      * <p>This method forwards the image and the transformation {@link Matrix} to the {@code
152      * Detector}s. The {@code Matrix} is calculated based on the target coordinate system set in
153      * the constructor.
154      *
155      * <p>Usually this method is invoked by {@link ImageAnalysis} when a new frame is available.
156      *
157      * @see ImageAnalysis.Analyzer#analyze
158      */
159     @Override
160     @OptIn(markerClass = TransformExperimental.class)
analyze(@onNull ImageProxy imageProxy)161     public final void analyze(@NonNull ImageProxy imageProxy) {
162         // By default, the matrix is identity for COORDINATE_SYSTEM_ORIGINAL.
163         Matrix analysisToTarget = new Matrix();
164         if (mTargetCoordinateSystem != COORDINATE_SYSTEM_ORIGINAL) {
165             // Calculate the transform if not COORDINATE_SYSTEM_ORIGINAL.
166             Matrix sensorToTarget = mSensorToTarget;
167             if (mTargetCoordinateSystem != COORDINATE_SYSTEM_SENSOR && sensorToTarget == null) {
168                 // If the app sets an sensor to target transformation, we cannot provide correct
169                 // coordinates until it is ready. Return early.
170                 Logger.d(TAG, "Sensor-to-target transformation is null.");
171                 imageProxy.close();
172                 return;
173             }
174             Matrix sensorToAnalysis =
175                     new Matrix(imageProxy.getImageInfo().getSensorToBufferTransformMatrix());
176             // Calculate the rotation added by ML Kit.
177             RectF sourceRect = new RectF(0, 0, imageProxy.getWidth(),
178                     imageProxy.getHeight());
179             RectF bufferRect = rotateRect(sourceRect,
180                     imageProxy.getImageInfo().getRotationDegrees());
181             Matrix analysisToMlKitRotation = getRectToRect(sourceRect, bufferRect,
182                     imageProxy.getImageInfo().getRotationDegrees());
183             // Concat the MLKit transformation with sensor to Analysis.
184             sensorToAnalysis.postConcat(analysisToMlKitRotation);
185             // Invert to get analysis to sensor.
186             sensorToAnalysis.invert(analysisToTarget);
187             if (mTargetCoordinateSystem != COORDINATE_SYSTEM_SENSOR) {
188                 // Concat the sensor to target transformation to get the overall transformation.
189                 analysisToTarget.postConcat(sensorToTarget);
190             }
191         }
192         // Detect the image recursively, starting from index 0.
193         detectRecursively(imageProxy, 0, analysisToTarget, new HashMap<>(), new HashMap<>());
194     }
195 
196     /**
197      * Recursively processes the image with {@link #mDetectors}.
198      *
199      * @param detectorIndex the current index of {@link #mDetectors} being processed.
200      * @param values        values returned from the {@link #mDetectors}.
201      * @param throwables    exceptions returned from the {@link #mDetectors}.
202      */
203     @OptIn(markerClass = ExperimentalGetImage.class)
detectRecursively( @onNull ImageProxy imageProxy, int detectorIndex, @NonNull Matrix transform, Map<Detector<?>, Object> values, @NonNull Map<Detector<?>, Throwable> throwables)204     private void detectRecursively(
205             @NonNull ImageProxy imageProxy,
206             int detectorIndex,
207             @NonNull Matrix transform,
208             Map<Detector<?>, Object> values,
209             @NonNull Map<Detector<?>, Throwable> throwables) {
210         Image image = imageProxy.getImage();
211         if (image == null) {
212             // No-op if the frame is not backed by ImageProxy.
213             Logger.e(TAG, "Image is null.");
214             imageProxy.close();
215             return;
216         }
217 
218         if (detectorIndex > mDetectors.size() - 1) {
219             // Termination condition is met when the index reaches the end of the list.
220             imageProxy.close();
221             mExecutor.execute(() -> mConsumer.accept(
222                     new Result(values, imageProxy.getImageInfo().getTimestamp(), throwables)));
223             return;
224         }
225         Detector<?> detector = mDetectors.get(detectorIndex);
226         int rotationDegrees = imageProxy.getImageInfo().getRotationDegrees();
227 
228         Task<?> mlKitTask;
229         try {
230             mlKitTask = detector.process(image, rotationDegrees, transform);
231         } catch (Exception e) {
232             // If the detector is closed, it will throw a MlKitException.UNAVAILABLE. It's not
233             // public in the "mlkit:vision-interfaces" artifact so we have to catch a generic
234             // Exception here.
235             throwables.put(detector, new RuntimeException("Failed to process the image.", e));
236             // This detector is closed, but the next one might still be open. Send the image to
237             // the next detector.
238             detectRecursively(imageProxy, detectorIndex + 1, transform, values,
239                     throwables);
240             return;
241         }
242         mlKitTask.addOnCompleteListener(
243                 mExecutor,
244                 task -> {
245                     // Record the return value / exception.
246                     if (task.isCanceled()) {
247                         throwables.put(detector,
248                                 new CancellationException("The task is canceled."));
249                     } else if (task.isSuccessful()) {
250                         values.put(detector, task.getResult());
251                     } else {
252                         throwables.put(detector, task.getException());
253                     }
254                     // Go to the next detector.
255                     detectRecursively(imageProxy, detectorIndex + 1, transform, values,
256                             throwables);
257                 });
258     }
259 
260     /**
261      * {@inheritDoc}
262      */
263     @Override
getDefaultTargetResolution()264     public final @NonNull Size getDefaultTargetResolution() {
265         Size size = DEFAULT_SIZE;
266         for (Detector<?> detector : mDetectors) {
267             Size detectorSize = getTargetResolution(detector.getDetectorType());
268             if (detectorSize.getHeight() * detectorSize.getWidth()
269                     > size.getWidth() * size.getHeight()) {
270                 size = detectorSize;
271             }
272         }
273         return size;
274     }
275 
276     /**
277      * Gets the recommended resolution for the given {@code Detector} type.
278      *
279      * <p> The resolution can be found on ML Kit's DAC page.
280      */
getTargetResolution(int detectorType)281     private @NonNull Size getTargetResolution(int detectorType) {
282         switch (detectorType) {
283             case TYPE_BARCODE_SCANNING:
284             case TYPE_TEXT_RECOGNITION:
285                 return new Size(1280, 720);
286             default:
287                 return DEFAULT_SIZE;
288         }
289     }
290 
291     /**
292      * {@inheritDoc}
293      */
294     @Override
getTargetCoordinateSystem()295     public final int getTargetCoordinateSystem() {
296         return mTargetCoordinateSystem;
297     }
298 
299     /**
300      * {@inheritDoc}
301      */
302     @Override
updateTransform(@ullable Matrix matrix)303     public final void updateTransform(@Nullable Matrix matrix) {
304         if (matrix == null) {
305             mSensorToTarget = null;
306         } else {
307             mSensorToTarget = new Matrix(matrix);
308         }
309     }
310 
311     /**
312      * The aggregated MLKit result of a camera frame.
313      */
314     public static final class Result {
315 
316         private final @NonNull Map<Detector<?>, Object> mValues;
317         private final @NonNull Map<Detector<?>, Throwable> mThrowables;
318         private final long mTimestamp;
319 
Result(@onNull Map<Detector<?>, Object> values, long timestamp, @NonNull Map<Detector<?>, Throwable> throwables)320         public Result(@NonNull Map<Detector<?>, Object> values, long timestamp,
321                 @NonNull Map<Detector<?>, Throwable> throwables) {
322             mValues = values;
323             mThrowables = throwables;
324             mTimestamp = timestamp;
325         }
326 
327         /**
328          * Get the analysis result for the given ML Kit {@code Detector}.
329          *
330          * <p>Returns {@code null} if the detection is unsuccessful.
331          *
332          * <p>This method and {@link #getThrowable} may both return {@code null}. For example,
333          * when a face detector processes a frame successfully and does not detect any faces.
334          * However, if {@link #getThrowable} returns a non-null {@link Throwable}, then this
335          * method will always return {@code null}.
336          *
337          * @param detector has to be one of the {@code Detector}s provided in
338          *                 {@link MlKitAnalyzer}'s constructor.
339          */
340         @SuppressWarnings("unchecked")
getValue(@onNull Detector<T> detector)341         public <T> @Nullable T getValue(@NonNull Detector<T> detector) {
342             checkDetectorExists(detector);
343             return (T) mValues.get(detector);
344         }
345 
346         /**
347          * The error returned from the given {@code Detector}.
348          *
349          * <p>Returns {@code null} if the {@code Detector} finishes without exceptions.
350          *
351          * @param detector has to be one of the {@code Detector}s provided in
352          *                 {@link MlKitAnalyzer}'s constructor.
353          */
getThrowable(@onNull Detector<?> detector)354         public @Nullable Throwable getThrowable(@NonNull Detector<?> detector) {
355             checkDetectorExists(detector);
356             return mThrowables.get(detector);
357         }
358 
359         /**
360          * The timestamp of the camera frame.
361          *
362          * <p> The timestamp of the camera frame based on which the analysis result is produced.
363          * This is the value of {@link ImageProxy#getImageInfo()#getTimestamp()}.
364          */
getTimestamp()365         public long getTimestamp() {
366             return mTimestamp;
367         }
368 
checkDetectorExists(@onNull Detector<?> detector)369         private void checkDetectorExists(@NonNull Detector<?> detector) {
370             checkArgument(mValues.containsKey(detector) || mThrowables.containsKey(detector),
371                     "The detector does not exist");
372         }
373     }
374 }
375