1 /* 2 * Copyright 2022 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package androidx.camera.mlkit.vision; 17 18 import static androidx.camera.core.ImageAnalysis.COORDINATE_SYSTEM_ORIGINAL; 19 import static androidx.camera.core.ImageAnalysis.COORDINATE_SYSTEM_SENSOR; 20 import static androidx.camera.core.impl.utils.TransformUtils.getRectToRect; 21 import static androidx.camera.core.impl.utils.TransformUtils.rotateRect; 22 23 import static com.google.android.gms.common.internal.Preconditions.checkArgument; 24 import static com.google.mlkit.vision.interfaces.Detector.TYPE_BARCODE_SCANNING; 25 import static com.google.mlkit.vision.interfaces.Detector.TYPE_SEGMENTATION; 26 import static com.google.mlkit.vision.interfaces.Detector.TYPE_TEXT_RECOGNITION; 27 28 import android.graphics.Matrix; 29 import android.graphics.RectF; 30 import android.media.Image; 31 import android.util.Size; 32 33 import androidx.annotation.OptIn; 34 import androidx.camera.core.ExperimentalGetImage; 35 import androidx.camera.core.ImageAnalysis; 36 import androidx.camera.core.ImageProxy; 37 import androidx.camera.core.Logger; 38 import androidx.camera.view.TransformExperimental; 39 import androidx.camera.view.transform.ImageProxyTransformFactory; 40 import androidx.core.util.Consumer; 41 42 import com.google.android.gms.tasks.Task; 43 import com.google.mlkit.vision.interfaces.Detector; 44 45 import org.jspecify.annotations.NonNull; 46 import org.jspecify.annotations.Nullable; 47 48 import java.util.ArrayList; 49 import java.util.HashMap; 50 import java.util.List; 51 import java.util.Map; 52 import java.util.concurrent.CancellationException; 53 import java.util.concurrent.Executor; 54 55 /** 56 * An implementation of {@link ImageAnalysis.Analyzer} with ML Kit libraries. 57 * 58 * <p> This class is a wrapper of one or many ML Kit {@code Detector}s. It forwards 59 * {@link ImageAnalysis} frames to all the {@code Detector}s sequentially. Once all the 60 * {@code Detector}s finish analyzing the frame, {@link Consumer#accept} will be 61 * invoked with the aggregated analysis results. 62 * 63 * <p> This class handles the coordinate transformation between ML Kit output and the target 64 * coordinate system. Using the {@code targetCoordinateSystem} set in the constructor, it 65 * calculates the {@link Matrix} with the value provided by CameraX via 66 * {@link ImageAnalysis.Analyzer#updateTransform} and forwards it to the ML Kit {@code Detector}. 67 * The coordinates returned by MLKit will be in the specified coordinate system. 68 * 69 * <p> This class is designed to work seamlessly with the {@code CameraController} class in 70 * camera-view. When used with {@link ImageAnalysis} in camera-core, the following scenarios may 71 * need special handling: 72 * <ul> 73 * <li> Cannot transform coordinates to UI coordinate system. e.g. camera-core only supports 74 * {@link ImageAnalysis#COORDINATE_SYSTEM_ORIGINAL}. 75 * <li>For the value of {@link #getDefaultTargetResolution()} to be effective, make sure 76 * the {@link ImageAnalysis#setAnalyzer} is called before it's bound to the lifecycle. 77 * </ul> 78 * 79 * Code sample: 80 * <pre><code> 81 * cameraController.setImageAnalysisAnalyzer(executor, 82 * new MlKitAnalyzer(List.of(barcodeScanner), COORDINATE_SYSTEM_VIEW_REFERENCED, 83 * executor, result -> { 84 * // The value of result.getResult(barcodeScanner) can be used directly for drawing UI overlay. 85 * }); 86 * </pre></code> 87 * 88 * @see ImageAnalysis.Analyzer 89 */ 90 public class MlKitAnalyzer implements ImageAnalysis.Analyzer { 91 92 private static final String TAG = "MlKitAnalyzer"; 93 94 private static final Size DEFAULT_SIZE = new Size(480, 360); 95 96 private final @NonNull List<Detector<?>> mDetectors; 97 private final int mTargetCoordinateSystem; 98 // Synthetic access 99 final @NonNull Consumer<Result> mConsumer; 100 // Synthetic access 101 final ImageProxyTransformFactory mImageAnalysisTransformFactory; 102 private final @NonNull Executor mExecutor; 103 104 private @Nullable Matrix mSensorToTarget; 105 106 /** 107 * Constructor of {@link MlKitAnalyzer}. 108 * 109 * <p>The list of detectors will be invoked sequentially in order. 110 * 111 * <p>When the targetCoordinateSystem is {@link ImageAnalysis#COORDINATE_SYSTEM_ORIGINAL}, the 112 * output coordinate system is defined by ML Kit, which is the buffer with rotation applied. For 113 * example, if {@link ImageProxy#getHeight()} is {@code h} and the rotation is 90°, (0, 0) in 114 * the result maps to the pixel (0, h) in the original buffer. 115 * 116 * <p>The constructor throws {@link IllegalArgumentException} if 117 * {@code Detector#getDetectorType()} is TYPE_SEGMENTATION and {@code targetCoordinateSystem} 118 * is COORDINATE_SYSTEM_ORIGINAL. Currently ML Kit does not support transformation with 119 * segmentation. 120 * 121 * @param detectors list of ML Kit {@link Detector}. 122 * @param targetCoordinateSystem e.g. {@link ImageAnalysis#COORDINATE_SYSTEM_ORIGINAL} 123 * the coordinates in ML Kit output will be based on this value. 124 * @param executor on which the consumer is invoked. 125 * @param consumer invoked when there is a new ML Kit result. 126 */ 127 @OptIn(markerClass = TransformExperimental.class) MlKitAnalyzer( @onNull List<Detector<?>> detectors, int targetCoordinateSystem, @NonNull Executor executor, @NonNull Consumer<Result> consumer)128 public MlKitAnalyzer( 129 @NonNull List<Detector<?>> detectors, 130 int targetCoordinateSystem, 131 @NonNull Executor executor, 132 @NonNull Consumer<Result> consumer) { 133 if (targetCoordinateSystem != COORDINATE_SYSTEM_ORIGINAL) { 134 for (Detector<?> detector : detectors) { 135 checkArgument(detector.getDetectorType() != TYPE_SEGMENTATION, 136 "Segmentation only works with COORDINATE_SYSTEM_ORIGINAL"); 137 } 138 } 139 // Make an immutable copy of the app provided detectors. 140 mDetectors = new ArrayList<>(detectors); 141 mTargetCoordinateSystem = targetCoordinateSystem; 142 mConsumer = consumer; 143 mExecutor = executor; 144 mImageAnalysisTransformFactory = new ImageProxyTransformFactory(); 145 mImageAnalysisTransformFactory.setUsingRotationDegrees(true); 146 } 147 148 /** 149 * Analyzes the image with the ML Kit {@code Detector}s. 150 * 151 * <p>This method forwards the image and the transformation {@link Matrix} to the {@code 152 * Detector}s. The {@code Matrix} is calculated based on the target coordinate system set in 153 * the constructor. 154 * 155 * <p>Usually this method is invoked by {@link ImageAnalysis} when a new frame is available. 156 * 157 * @see ImageAnalysis.Analyzer#analyze 158 */ 159 @Override 160 @OptIn(markerClass = TransformExperimental.class) analyze(@onNull ImageProxy imageProxy)161 public final void analyze(@NonNull ImageProxy imageProxy) { 162 // By default, the matrix is identity for COORDINATE_SYSTEM_ORIGINAL. 163 Matrix analysisToTarget = new Matrix(); 164 if (mTargetCoordinateSystem != COORDINATE_SYSTEM_ORIGINAL) { 165 // Calculate the transform if not COORDINATE_SYSTEM_ORIGINAL. 166 Matrix sensorToTarget = mSensorToTarget; 167 if (mTargetCoordinateSystem != COORDINATE_SYSTEM_SENSOR && sensorToTarget == null) { 168 // If the app sets an sensor to target transformation, we cannot provide correct 169 // coordinates until it is ready. Return early. 170 Logger.d(TAG, "Sensor-to-target transformation is null."); 171 imageProxy.close(); 172 return; 173 } 174 Matrix sensorToAnalysis = 175 new Matrix(imageProxy.getImageInfo().getSensorToBufferTransformMatrix()); 176 // Calculate the rotation added by ML Kit. 177 RectF sourceRect = new RectF(0, 0, imageProxy.getWidth(), 178 imageProxy.getHeight()); 179 RectF bufferRect = rotateRect(sourceRect, 180 imageProxy.getImageInfo().getRotationDegrees()); 181 Matrix analysisToMlKitRotation = getRectToRect(sourceRect, bufferRect, 182 imageProxy.getImageInfo().getRotationDegrees()); 183 // Concat the MLKit transformation with sensor to Analysis. 184 sensorToAnalysis.postConcat(analysisToMlKitRotation); 185 // Invert to get analysis to sensor. 186 sensorToAnalysis.invert(analysisToTarget); 187 if (mTargetCoordinateSystem != COORDINATE_SYSTEM_SENSOR) { 188 // Concat the sensor to target transformation to get the overall transformation. 189 analysisToTarget.postConcat(sensorToTarget); 190 } 191 } 192 // Detect the image recursively, starting from index 0. 193 detectRecursively(imageProxy, 0, analysisToTarget, new HashMap<>(), new HashMap<>()); 194 } 195 196 /** 197 * Recursively processes the image with {@link #mDetectors}. 198 * 199 * @param detectorIndex the current index of {@link #mDetectors} being processed. 200 * @param values values returned from the {@link #mDetectors}. 201 * @param throwables exceptions returned from the {@link #mDetectors}. 202 */ 203 @OptIn(markerClass = ExperimentalGetImage.class) detectRecursively( @onNull ImageProxy imageProxy, int detectorIndex, @NonNull Matrix transform, Map<Detector<?>, Object> values, @NonNull Map<Detector<?>, Throwable> throwables)204 private void detectRecursively( 205 @NonNull ImageProxy imageProxy, 206 int detectorIndex, 207 @NonNull Matrix transform, 208 Map<Detector<?>, Object> values, 209 @NonNull Map<Detector<?>, Throwable> throwables) { 210 Image image = imageProxy.getImage(); 211 if (image == null) { 212 // No-op if the frame is not backed by ImageProxy. 213 Logger.e(TAG, "Image is null."); 214 imageProxy.close(); 215 return; 216 } 217 218 if (detectorIndex > mDetectors.size() - 1) { 219 // Termination condition is met when the index reaches the end of the list. 220 imageProxy.close(); 221 mExecutor.execute(() -> mConsumer.accept( 222 new Result(values, imageProxy.getImageInfo().getTimestamp(), throwables))); 223 return; 224 } 225 Detector<?> detector = mDetectors.get(detectorIndex); 226 int rotationDegrees = imageProxy.getImageInfo().getRotationDegrees(); 227 228 Task<?> mlKitTask; 229 try { 230 mlKitTask = detector.process(image, rotationDegrees, transform); 231 } catch (Exception e) { 232 // If the detector is closed, it will throw a MlKitException.UNAVAILABLE. It's not 233 // public in the "mlkit:vision-interfaces" artifact so we have to catch a generic 234 // Exception here. 235 throwables.put(detector, new RuntimeException("Failed to process the image.", e)); 236 // This detector is closed, but the next one might still be open. Send the image to 237 // the next detector. 238 detectRecursively(imageProxy, detectorIndex + 1, transform, values, 239 throwables); 240 return; 241 } 242 mlKitTask.addOnCompleteListener( 243 mExecutor, 244 task -> { 245 // Record the return value / exception. 246 if (task.isCanceled()) { 247 throwables.put(detector, 248 new CancellationException("The task is canceled.")); 249 } else if (task.isSuccessful()) { 250 values.put(detector, task.getResult()); 251 } else { 252 throwables.put(detector, task.getException()); 253 } 254 // Go to the next detector. 255 detectRecursively(imageProxy, detectorIndex + 1, transform, values, 256 throwables); 257 }); 258 } 259 260 /** 261 * {@inheritDoc} 262 */ 263 @Override getDefaultTargetResolution()264 public final @NonNull Size getDefaultTargetResolution() { 265 Size size = DEFAULT_SIZE; 266 for (Detector<?> detector : mDetectors) { 267 Size detectorSize = getTargetResolution(detector.getDetectorType()); 268 if (detectorSize.getHeight() * detectorSize.getWidth() 269 > size.getWidth() * size.getHeight()) { 270 size = detectorSize; 271 } 272 } 273 return size; 274 } 275 276 /** 277 * Gets the recommended resolution for the given {@code Detector} type. 278 * 279 * <p> The resolution can be found on ML Kit's DAC page. 280 */ getTargetResolution(int detectorType)281 private @NonNull Size getTargetResolution(int detectorType) { 282 switch (detectorType) { 283 case TYPE_BARCODE_SCANNING: 284 case TYPE_TEXT_RECOGNITION: 285 return new Size(1280, 720); 286 default: 287 return DEFAULT_SIZE; 288 } 289 } 290 291 /** 292 * {@inheritDoc} 293 */ 294 @Override getTargetCoordinateSystem()295 public final int getTargetCoordinateSystem() { 296 return mTargetCoordinateSystem; 297 } 298 299 /** 300 * {@inheritDoc} 301 */ 302 @Override updateTransform(@ullable Matrix matrix)303 public final void updateTransform(@Nullable Matrix matrix) { 304 if (matrix == null) { 305 mSensorToTarget = null; 306 } else { 307 mSensorToTarget = new Matrix(matrix); 308 } 309 } 310 311 /** 312 * The aggregated MLKit result of a camera frame. 313 */ 314 public static final class Result { 315 316 private final @NonNull Map<Detector<?>, Object> mValues; 317 private final @NonNull Map<Detector<?>, Throwable> mThrowables; 318 private final long mTimestamp; 319 Result(@onNull Map<Detector<?>, Object> values, long timestamp, @NonNull Map<Detector<?>, Throwable> throwables)320 public Result(@NonNull Map<Detector<?>, Object> values, long timestamp, 321 @NonNull Map<Detector<?>, Throwable> throwables) { 322 mValues = values; 323 mThrowables = throwables; 324 mTimestamp = timestamp; 325 } 326 327 /** 328 * Get the analysis result for the given ML Kit {@code Detector}. 329 * 330 * <p>Returns {@code null} if the detection is unsuccessful. 331 * 332 * <p>This method and {@link #getThrowable} may both return {@code null}. For example, 333 * when a face detector processes a frame successfully and does not detect any faces. 334 * However, if {@link #getThrowable} returns a non-null {@link Throwable}, then this 335 * method will always return {@code null}. 336 * 337 * @param detector has to be one of the {@code Detector}s provided in 338 * {@link MlKitAnalyzer}'s constructor. 339 */ 340 @SuppressWarnings("unchecked") getValue(@onNull Detector<T> detector)341 public <T> @Nullable T getValue(@NonNull Detector<T> detector) { 342 checkDetectorExists(detector); 343 return (T) mValues.get(detector); 344 } 345 346 /** 347 * The error returned from the given {@code Detector}. 348 * 349 * <p>Returns {@code null} if the {@code Detector} finishes without exceptions. 350 * 351 * @param detector has to be one of the {@code Detector}s provided in 352 * {@link MlKitAnalyzer}'s constructor. 353 */ getThrowable(@onNull Detector<?> detector)354 public @Nullable Throwable getThrowable(@NonNull Detector<?> detector) { 355 checkDetectorExists(detector); 356 return mThrowables.get(detector); 357 } 358 359 /** 360 * The timestamp of the camera frame. 361 * 362 * <p> The timestamp of the camera frame based on which the analysis result is produced. 363 * This is the value of {@link ImageProxy#getImageInfo()#getTimestamp()}. 364 */ getTimestamp()365 public long getTimestamp() { 366 return mTimestamp; 367 } 368 checkDetectorExists(@onNull Detector<?> detector)369 private void checkDetectorExists(@NonNull Detector<?> detector) { 370 checkArgument(mValues.containsKey(detector) || mThrowables.containsKey(detector), 371 "The detector does not exist"); 372 } 373 } 374 } 375