1# Copyright 2013 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Image processing utility functions.""" 15 16 17import copy 18import io 19import logging 20import math 21import os 22import random 23import sys 24import unittest 25 26import capture_request_utils 27import cv2 28import error_util 29import numpy 30from PIL import Image 31 32 33# The matrix is from JFIF spec 34DEFAULT_YUV_TO_RGB_CCM = numpy.matrix([[1.000, 0.000, 1.402], 35 [1.000, -0.344, -0.714], 36 [1.000, 1.772, 0.000]]) 37 38DEFAULT_YUV_OFFSETS = numpy.array([0, 128, 128]) 39MAX_LUT_SIZE = 65536 40DEFAULT_GAMMA_LUT = numpy.array([ 41 math.floor((MAX_LUT_SIZE-1) * math.pow(i/(MAX_LUT_SIZE-1), 1/2.2) + 0.5) 42 for i in range(MAX_LUT_SIZE)]) 43NUM_TRIES = 2 44NUM_FRAMES = 4 45TEST_IMG_DIR = os.path.join(os.environ['CAMERA_ITS_TOP'], 'test_images') 46 47 48def assert_props_is_not_none(props): 49 if not props: 50 raise AssertionError('props is None') 51 52 53def convert_capture_to_rgb_image(cap, 54 props=None, 55 apply_ccm_raw_to_rgb=True): 56 """Convert a captured image object to a RGB image. 57 58 Args: 59 cap: A capture object as returned by its_session_utils.do_capture. 60 props: (Optional) camera properties object (of static values); 61 required for processing raw images. 62 apply_ccm_raw_to_rgb: (Optional) boolean to apply color correction matrix. 63 64 Returns: 65 RGB float-3 image array, with pixel values in [0.0, 1.0]. 66 """ 67 w = cap['width'] 68 h = cap['height'] 69 if cap['format'] == 'raw10': 70 assert_props_is_not_none(props) 71 cap = unpack_raw10_capture(cap) 72 73 if cap['format'] == 'raw12': 74 assert_props_is_not_none(props) 75 cap = unpack_raw12_capture(cap) 76 77 if cap['format'] == 'yuv': 78 y = cap['data'][0: w * h] 79 u = cap['data'][w * h: w * h * 5//4] 80 v = cap['data'][w * h * 5//4: w * h * 6//4] 81 return convert_yuv420_planar_to_rgb_image(y, u, v, w, h) 82 elif cap['format'] == 'jpeg': 83 return decompress_jpeg_to_rgb_image(cap['data']) 84 elif cap['format'] == 'raw' or cap['format'] == 'rawStats': 85 assert_props_is_not_none(props) 86 r, gr, gb, b = convert_capture_to_planes(cap, props) 87 return convert_raw_to_rgb_image( 88 r, gr, gb, b, props, cap['metadata'], apply_ccm_raw_to_rgb) 89 elif cap['format'] == 'y8': 90 y = cap['data'][0: w * h] 91 return convert_y8_to_rgb_image(y, w, h) 92 else: 93 raise error_util.CameraItsError('Invalid format %s' % (cap['format'])) 94 95 96def unpack_raw10_capture(cap): 97 """Unpack a raw-10 capture to a raw-16 capture. 98 99 Args: 100 cap: A raw-10 capture object. 101 102 Returns: 103 New capture object with raw-16 data. 104 """ 105 # Data is packed as 4x10b pixels in 5 bytes, with the first 4 bytes holding 106 # the MSBs of the pixels, and the 5th byte holding 4x2b LSBs. 107 w, h = cap['width'], cap['height'] 108 if w % 4 != 0: 109 raise error_util.CameraItsError('Invalid raw-10 buffer width') 110 cap = copy.deepcopy(cap) 111 cap['data'] = unpack_raw10_image(cap['data'].reshape(h, w * 5 // 4)) 112 cap['format'] = 'raw' 113 return cap 114 115 116def unpack_raw10_image(img): 117 """Unpack a raw-10 image to a raw-16 image. 118 119 Output image will have the 10 LSBs filled in each 16b word, and the 6 MSBs 120 will be set to zero. 121 122 Args: 123 img: A raw-10 image, as a uint8 numpy array. 124 125 Returns: 126 Image as a uint16 numpy array, with all row padding stripped. 127 """ 128 if img.shape[1] % 5 != 0: 129 raise error_util.CameraItsError('Invalid raw-10 buffer width') 130 w = img.shape[1] * 4 // 5 131 h = img.shape[0] 132 # Cut out the 4x8b MSBs and shift to bits [9:2] in 16b words. 133 msbs = numpy.delete(img, numpy.s_[4::5], 1) 134 msbs = msbs.astype(numpy.uint16) 135 msbs = numpy.left_shift(msbs, 2) 136 msbs = msbs.reshape(h, w) 137 # Cut out the 4x2b LSBs and put each in bits [1:0] of their own 8b words. 138 lsbs = img[::, 4::5].reshape(h, w // 4) 139 lsbs = numpy.right_shift( 140 numpy.packbits(numpy.unpackbits(lsbs).reshape((h, w // 4, 4, 2)), 3), 6) 141 # Pair the LSB bits group to 0th pixel instead of 3rd pixel 142 lsbs = lsbs.reshape(h, w // 4, 4)[:, :, ::-1] 143 lsbs = lsbs.reshape(h, w) 144 # Fuse the MSBs and LSBs back together 145 img16 = numpy.bitwise_or(msbs, lsbs).reshape(h, w) 146 return img16 147 148 149def unpack_raw12_capture(cap): 150 """Unpack a raw-12 capture to a raw-16 capture. 151 152 Args: 153 cap: A raw-12 capture object. 154 155 Returns: 156 New capture object with raw-16 data. 157 """ 158 # Data is packed as 4x10b pixels in 5 bytes, with the first 4 bytes holding 159 # the MSBs of the pixels, and the 5th byte holding 4x2b LSBs. 160 w, h = cap['width'], cap['height'] 161 if w % 2 != 0: 162 raise error_util.CameraItsError('Invalid raw-12 buffer width') 163 cap = copy.deepcopy(cap) 164 cap['data'] = unpack_raw12_image(cap['data'].reshape(h, w * 3 // 2)) 165 cap['format'] = 'raw' 166 return cap 167 168 169def unpack_raw12_image(img): 170 """Unpack a raw-12 image to a raw-16 image. 171 172 Output image will have the 12 LSBs filled in each 16b word, and the 4 MSBs 173 will be set to zero. 174 175 Args: 176 img: A raw-12 image, as a uint8 numpy array. 177 178 Returns: 179 Image as a uint16 numpy array, with all row padding stripped. 180 """ 181 if img.shape[1] % 3 != 0: 182 raise error_util.CameraItsError('Invalid raw-12 buffer width') 183 w = img.shape[1] * 2 // 3 184 h = img.shape[0] 185 # Cut out the 2x8b MSBs and shift to bits [11:4] in 16b words. 186 msbs = numpy.delete(img, numpy.s_[2::3], 1) 187 msbs = msbs.astype(numpy.uint16) 188 msbs = numpy.left_shift(msbs, 4) 189 msbs = msbs.reshape(h, w) 190 # Cut out the 2x4b LSBs and put each in bits [3:0] of their own 8b words. 191 lsbs = img[::, 2::3].reshape(h, w // 2) 192 lsbs = numpy.right_shift( 193 numpy.packbits(numpy.unpackbits(lsbs).reshape((h, w // 2, 2, 4)), 3), 4) 194 # Pair the LSB bits group to pixel 0 instead of pixel 1 195 lsbs = lsbs.reshape(h, w // 2, 2)[:, :, ::-1] 196 lsbs = lsbs.reshape(h, w) 197 # Fuse the MSBs and LSBs back together 198 img16 = numpy.bitwise_or(msbs, lsbs).reshape(h, w) 199 return img16 200 201 202def convert_yuv420_planar_to_rgb_image(y_plane, u_plane, v_plane, 203 w, h, 204 ccm_yuv_to_rgb=DEFAULT_YUV_TO_RGB_CCM, 205 yuv_off=DEFAULT_YUV_OFFSETS): 206 """Convert a YUV420 8-bit planar image to an RGB image. 207 208 Args: 209 y_plane: The packed 8-bit Y plane. 210 u_plane: The packed 8-bit U plane. 211 v_plane: The packed 8-bit V plane. 212 w: The width of the image. 213 h: The height of the image. 214 ccm_yuv_to_rgb: (Optional) the 3x3 CCM to convert from YUV to RGB. 215 yuv_off: (Optional) offsets to subtract from each of Y,U,V values. 216 217 Returns: 218 RGB float-3 image array, with pixel values in [0.0, 1.0]. 219 """ 220 y = numpy.subtract(y_plane, yuv_off[0]) 221 u = numpy.subtract(u_plane, yuv_off[1]).view(numpy.int8) 222 v = numpy.subtract(v_plane, yuv_off[2]).view(numpy.int8) 223 u = u.reshape(h // 2, w // 2).repeat(2, axis=1).repeat(2, axis=0) 224 v = v.reshape(h // 2, w // 2).repeat(2, axis=1).repeat(2, axis=0) 225 yuv = numpy.dstack([y, u.reshape(w * h), v.reshape(w * h)]) 226 flt = numpy.empty([h, w, 3], dtype=numpy.float32) 227 flt.reshape(w * h * 3)[:] = yuv.reshape(h * w * 3)[:] 228 flt = numpy.dot(flt.reshape(w * h, 3), ccm_yuv_to_rgb.T).clip(0, 255) 229 rgb = numpy.empty([h, w, 3], dtype=numpy.uint8) 230 rgb.reshape(w * h * 3)[:] = flt.reshape(w * h * 3)[:] 231 return rgb.astype(numpy.float32) / 255.0 232 233 234def decompress_jpeg_to_rgb_image(jpeg_buffer): 235 """Decompress a JPEG-compressed image, returning as an RGB image. 236 237 Args: 238 jpeg_buffer: The JPEG stream. 239 240 Returns: 241 A numpy array for the RGB image, with pixels in [0,1]. 242 """ 243 img = Image.open(io.BytesIO(jpeg_buffer)) 244 w = img.size[0] 245 h = img.size[1] 246 return numpy.array(img).reshape((h, w, 3)) / 255.0 247 248 249def convert_image_to_numpy_array(image_path): 250 """Converts image at image_path to numpy array and returns the array. 251 252 Args: 253 image_path: file path 254 Returns: 255 numpy array 256 """ 257 if not os.path.exists(image_path): 258 raise AssertionError(f'{image_path} does not exist.') 259 image = Image.open(image_path) 260 return numpy.array(image) 261 262 263def convert_capture_to_planes(cap, props=None): 264 """Convert a captured image object to separate image planes. 265 266 Decompose an image into multiple images, corresponding to different planes. 267 268 For YUV420 captures ("yuv"): 269 Returns Y,U,V planes, where the Y plane is full-res and the U,V planes 270 are each 1/2 x 1/2 of the full res. 271 272 For Bayer captures ("raw", "raw10", "raw12", or "rawStats"): 273 Returns planes in the order R,Gr,Gb,B, regardless of the Bayer pattern 274 layout. For full-res raw images ("raw", "raw10", "raw12"), each plane 275 is 1/2 x 1/2 of the full res. For "rawStats" images, the mean image 276 is returned. 277 278 For JPEG captures ("jpeg"): 279 Returns R,G,B full-res planes. 280 281 Args: 282 cap: A capture object as returned by its_session_utils.do_capture. 283 props: (Optional) camera properties object (of static values); 284 required for processing raw images. 285 286 Returns: 287 A tuple of float numpy arrays (one per plane), consisting of pixel values 288 in the range [0.0, 1.0]. 289 """ 290 w = cap['width'] 291 h = cap['height'] 292 if cap['format'] == 'raw10': 293 assert_props_is_not_none(props) 294 cap = unpack_raw10_capture(cap) 295 if cap['format'] == 'raw12': 296 assert_props_is_not_none(props) 297 cap = unpack_raw12_capture(cap) 298 if cap['format'] == 'yuv': 299 y = cap['data'][0:w * h] 300 u = cap['data'][w * h:w * h * 5 // 4] 301 v = cap['data'][w * h * 5 // 4:w * h * 6 // 4] 302 return ((y.astype(numpy.float32) / 255.0).reshape(h, w, 1), 303 (u.astype(numpy.float32) / 255.0).reshape(h // 2, w // 2, 1), 304 (v.astype(numpy.float32) / 255.0).reshape(h // 2, w // 2, 1)) 305 elif cap['format'] == 'jpeg': 306 rgb = decompress_jpeg_to_rgb_image(cap['data']).reshape(w * h * 3) 307 return (rgb[::3].reshape(h, w, 1), rgb[1::3].reshape(h, w, 1), 308 rgb[2::3].reshape(h, w, 1)) 309 elif cap['format'] == 'raw': 310 assert_props_is_not_none(props) 311 white_level = float(props['android.sensor.info.whiteLevel']) 312 img = numpy.ndarray( 313 shape=(h * w,), dtype='<u2', buffer=cap['data'][0:w * h * 2]) 314 img = img.astype(numpy.float32).reshape(h, w) / white_level 315 # Crop the raw image to the active array region. 316 if (props.get('android.sensor.info.preCorrectionActiveArraySize') is 317 not None and 318 props.get('android.sensor.info.pixelArraySize') is not None): 319 # Note that the Rect class is defined such that the left,top values 320 # are "inside" while the right,bottom values are "outside"; that is, 321 # it's inclusive of the top,left sides only. So, the width is 322 # computed as right-left, rather than right-left+1, etc. 323 wfull = props['android.sensor.info.pixelArraySize']['width'] 324 hfull = props['android.sensor.info.pixelArraySize']['height'] 325 xcrop = props['android.sensor.info.preCorrectionActiveArraySize']['left'] 326 ycrop = props['android.sensor.info.preCorrectionActiveArraySize']['top'] 327 wcrop = props['android.sensor.info.preCorrectionActiveArraySize'][ 328 'right'] - xcrop 329 hcrop = props['android.sensor.info.preCorrectionActiveArraySize'][ 330 'bottom'] - ycrop 331 if not wfull >= wcrop >= 0: 332 raise AssertionError(f'wcrop: {wcrop} not in wfull: {wfull}') 333 if not hfull >= hcrop >= 0: 334 raise AssertionError(f'hcrop: {hcrop} not in hfull: {hfull}') 335 if not wfull - wcrop >= xcrop >= 0: 336 raise AssertionError(f'xcrop: {xcrop} not in wfull-crop: {wfull-wcrop}') 337 if not hfull - hcrop >= ycrop >= 0: 338 raise AssertionError(f'ycrop: {ycrop} not in hfull-crop: {hfull-hcrop}') 339 if w == wfull and h == hfull: 340 # Crop needed; extract the center region. 341 img = img[ycrop:ycrop + hcrop, xcrop:xcrop + wcrop] 342 w = wcrop 343 h = hcrop 344 elif w == wcrop and h == hcrop: 345 logging.debug('Image is already cropped.No cropping needed.') 346 # pylint: disable=pointless-statement 347 None 348 else: 349 raise error_util.CameraItsError('Invalid image size metadata') 350 # Separate the image planes. 351 imgs = [ 352 img[::2].reshape(w * h // 2)[::2].reshape(h // 2, w // 2, 1), 353 img[::2].reshape(w * h // 2)[1::2].reshape(h // 2, w // 2, 1), 354 img[1::2].reshape(w * h // 2)[::2].reshape(h // 2, w // 2, 1), 355 img[1::2].reshape(w * h // 2)[1::2].reshape(h // 2, w // 2, 1) 356 ] 357 idxs = get_canonical_cfa_order(props) 358 return [imgs[i] for i in idxs] 359 elif cap['format'] == 'rawStats': 360 assert_props_is_not_none(props) 361 white_level = float(props['android.sensor.info.whiteLevel']) 362 # pylint: disable=unused-variable 363 mean_image, var_image = unpack_rawstats_capture(cap) 364 idxs = get_canonical_cfa_order(props) 365 return [mean_image[:, :, i] / white_level for i in idxs] 366 else: 367 raise error_util.CameraItsError('Invalid format %s' % (cap['format'])) 368 369 370def downscale_image(img, f): 371 """Shrink an image by a given integer factor. 372 373 This function computes output pixel values by averaging over rectangular 374 regions of the input image; it doesn't skip or sample pixels, and all input 375 image pixels are evenly weighted. 376 377 If the downscaling factor doesn't cleanly divide the width and/or height, 378 then the remaining pixels on the right or bottom edge are discarded prior 379 to the downscaling. 380 381 Args: 382 img: The input image as an ndarray. 383 f: The downscaling factor, which should be an integer. 384 385 Returns: 386 The new (downscaled) image, as an ndarray. 387 """ 388 h, w, chans = img.shape 389 f = int(f) 390 assert f >= 1 391 h = (h//f)*f 392 w = (w//f)*f 393 img = img[0:h:, 0:w:, ::] 394 chs = [] 395 for i in range(chans): 396 ch = img.reshape(h*w*chans)[i::chans].reshape(h, w) 397 ch = ch.reshape(h, w//f, f).mean(2).reshape(h, w//f) 398 ch = ch.T.reshape(w//f, h//f, f).mean(2).T.reshape(h//f, w//f) 399 chs.append(ch.reshape(h*w//(f*f))) 400 img = numpy.vstack(chs).T.reshape(h//f, w//f, chans) 401 return img 402 403 404def convert_raw_to_rgb_image(r_plane, gr_plane, gb_plane, b_plane, props, 405 cap_res, apply_ccm_raw_to_rgb=True): 406 """Convert a Bayer raw-16 image to an RGB image. 407 408 Includes some extremely rudimentary demosaicking and color processing 409 operations; the output of this function shouldn't be used for any image 410 quality analysis. 411 412 Args: 413 r_plane: 414 gr_plane: 415 gb_plane: 416 b_plane: Numpy arrays for each color plane 417 in the Bayer image, with pixels in the [0.0, 1.0] range. 418 props: Camera properties object. 419 cap_res: Capture result (metadata) object. 420 apply_ccm_raw_to_rgb: (Optional) boolean to apply color correction matrix. 421 422 Returns: 423 RGB float-3 image array, with pixel values in [0.0, 1.0] 424 """ 425 # Values required for the RAW to RGB conversion. 426 assert_props_is_not_none(props) 427 white_level = float(props['android.sensor.info.whiteLevel']) 428 black_levels = props['android.sensor.blackLevelPattern'] 429 gains = cap_res['android.colorCorrection.gains'] 430 ccm = cap_res['android.colorCorrection.transform'] 431 432 # Reorder black levels and gains to R,Gr,Gb,B, to match the order 433 # of the planes. 434 black_levels = [get_black_level(i, props, cap_res) for i in range(4)] 435 gains = get_gains_in_canonical_order(props, gains) 436 437 # Convert CCM from rational to float, as numpy arrays. 438 ccm = numpy.array(capture_request_utils.rational_to_float(ccm)).reshape(3, 3) 439 440 # Need to scale the image back to the full [0,1] range after subtracting 441 # the black level from each pixel. 442 scale = white_level / (white_level - max(black_levels)) 443 444 # Three-channel black levels, normalized to [0,1] by white_level. 445 black_levels = numpy.array( 446 [b / white_level for b in [black_levels[i] for i in [0, 1, 3]]]) 447 448 # Three-channel gains. 449 gains = numpy.array([gains[i] for i in [0, 1, 3]]) 450 451 h, w = r_plane.shape[:2] 452 img = numpy.dstack([r_plane, (gr_plane + gb_plane) / 2.0, b_plane]) 453 img = (((img.reshape(h, w, 3) - black_levels) * scale) * gains).clip(0.0, 1.0) 454 if apply_ccm_raw_to_rgb: 455 img = numpy.dot( 456 img.reshape(w * h, 3), ccm.T).reshape((h, w, 3)).clip(0.0, 1.0) 457 return img 458 459 460def convert_y8_to_rgb_image(y_plane, w, h): 461 """Convert a Y 8-bit image to an RGB image. 462 463 Args: 464 y_plane: The packed 8-bit Y plane. 465 w: The width of the image. 466 h: The height of the image. 467 468 Returns: 469 RGB float-3 image array, with pixel values in [0.0, 1.0]. 470 """ 471 y3 = numpy.dstack([y_plane, y_plane, y_plane]) 472 rgb = numpy.empty([h, w, 3], dtype=numpy.uint8) 473 rgb.reshape(w * h * 3)[:] = y3.reshape(w * h * 3)[:] 474 return rgb.astype(numpy.float32) / 255.0 475 476 477def write_image(img, fname, apply_gamma=False): 478 """Save a float-3 numpy array image to a file. 479 480 Supported formats: PNG, JPEG, and others; see PIL docs for more. 481 482 Image can be 3-channel, which is interpreted as RGB, or can be 1-channel, 483 which is greyscale. 484 485 Can optionally specify that the image should be gamma-encoded prior to 486 writing it out; this should be done if the image contains linear pixel 487 values, to make the image look "normal". 488 489 Args: 490 img: Numpy image array data. 491 fname: Path of file to save to; the extension specifies the format. 492 apply_gamma: (Optional) apply gamma to the image prior to writing it. 493 """ 494 if apply_gamma: 495 img = apply_lut_to_image(img, DEFAULT_GAMMA_LUT) 496 (h, w, chans) = img.shape 497 if chans == 3: 498 Image.fromarray((img * 255.0).astype(numpy.uint8), 'RGB').save(fname) 499 elif chans == 1: 500 img3 = (img * 255.0).astype(numpy.uint8).repeat(3).reshape(h, w, 3) 501 Image.fromarray(img3, 'RGB').save(fname) 502 else: 503 raise error_util.CameraItsError('Unsupported image type') 504 505 506def read_image(fname): 507 """Read image function to match write_image() above.""" 508 return Image.open(fname) 509 510 511def apply_lut_to_image(img, lut): 512 """Applies a LUT to every pixel in a float image array. 513 514 Internally converts to a 16b integer image, since the LUT can work with up 515 to 16b->16b mappings (i.e. values in the range [0,65535]). The lut can also 516 have fewer than 65536 entries, however it must be sized as a power of 2 517 (and for smaller luts, the scale must match the bitdepth). 518 519 For a 16b lut of 65536 entries, the operation performed is: 520 521 lut[r * 65535] / 65535 -> r' 522 lut[g * 65535] / 65535 -> g' 523 lut[b * 65535] / 65535 -> b' 524 525 For a 10b lut of 1024 entries, the operation becomes: 526 527 lut[r * 1023] / 1023 -> r' 528 lut[g * 1023] / 1023 -> g' 529 lut[b * 1023] / 1023 -> b' 530 531 Args: 532 img: Numpy float image array, with pixel values in [0,1]. 533 lut: Numpy table encoding a LUT, mapping 16b integer values. 534 535 Returns: 536 Float image array after applying LUT to each pixel. 537 """ 538 n = len(lut) 539 if n <= 0 or n > MAX_LUT_SIZE or (n & (n - 1)) != 0: 540 raise error_util.CameraItsError('Invalid arg LUT size: %d' % (n)) 541 m = float(n - 1) 542 return (lut[(img * m).astype(numpy.uint16)] / m).astype(numpy.float32) 543 544 545def get_gains_in_canonical_order(props, gains): 546 """Reorders the gains tuple to the canonical R,Gr,Gb,B order. 547 548 Args: 549 props: Camera properties object. 550 gains: List of 4 values, in R,G_even,G_odd,B order. 551 552 Returns: 553 List of gains values, in R,Gr,Gb,B order. 554 """ 555 cfa_pat = props['android.sensor.info.colorFilterArrangement'] 556 if cfa_pat in [0, 1]: 557 # RGGB or GRBG, so G_even is Gr 558 return gains 559 elif cfa_pat in [2, 3]: 560 # GBRG or BGGR, so G_even is Gb 561 return [gains[0], gains[2], gains[1], gains[3]] 562 else: 563 raise error_util.CameraItsError('Not supported') 564 565 566def get_black_level(chan, props, cap_res=None): 567 """Return the black level to use for a given capture. 568 569 Uses a dynamic value from the capture result if available, else falls back 570 to the static global value in the camera characteristics. 571 572 Args: 573 chan: The channel index, in canonical order (R, Gr, Gb, B). 574 props: The camera properties object. 575 cap_res: A capture result object. 576 577 Returns: 578 The black level value for the specified channel. 579 """ 580 if (cap_res is not None and 581 'android.sensor.dynamicBlackLevel' in cap_res and 582 cap_res['android.sensor.dynamicBlackLevel'] is not None): 583 black_levels = cap_res['android.sensor.dynamicBlackLevel'] 584 else: 585 black_levels = props['android.sensor.blackLevelPattern'] 586 idxs = get_canonical_cfa_order(props) 587 ordered_black_levels = [black_levels[i] for i in idxs] 588 return ordered_black_levels[chan] 589 590 591def get_canonical_cfa_order(props): 592 """Returns a mapping to the standard order R,Gr,Gb,B. 593 594 Returns a mapping from the Bayer 2x2 top-left grid in the CFA to the standard 595 order R,Gr,Gb,B. 596 597 Args: 598 props: Camera properties object. 599 600 Returns: 601 List of 4 integers, corresponding to the positions in the 2x2 top- 602 left Bayer grid of R,Gr,Gb,B, where the 2x2 grid is labeled as 603 0,1,2,3 in row major order. 604 """ 605 # Note that raw streams aren't croppable, so the cropRegion doesn't need 606 # to be considered when determining the top-left pixel color. 607 cfa_pat = props['android.sensor.info.colorFilterArrangement'] 608 if cfa_pat == 0: 609 # RGGB 610 return [0, 1, 2, 3] 611 elif cfa_pat == 1: 612 # GRBG 613 return [1, 0, 3, 2] 614 elif cfa_pat == 2: 615 # GBRG 616 return [2, 3, 0, 1] 617 elif cfa_pat == 3: 618 # BGGR 619 return [3, 2, 1, 0] 620 else: 621 raise error_util.CameraItsError('Not supported') 622 623 624def unpack_rawstats_capture(cap): 625 """Unpack a rawStats capture to the mean and variance images. 626 627 Args: 628 cap: A capture object as returned by its_session_utils.do_capture. 629 630 Returns: 631 Tuple (mean_image var_image) of float-4 images, with non-normalized 632 pixel values computed from the RAW16 images on the device 633 """ 634 if cap['format'] != 'rawStats': 635 raise AssertionError(f"Unpack fmt != rawStats: {cap['format']}") 636 w = cap['width'] 637 h = cap['height'] 638 img = numpy.ndarray(shape=(2 * h * w * 4,), dtype='<f', buffer=cap['data']) 639 analysis_image = img.reshape((2, h, w, 4)) 640 mean_image = analysis_image[0, :, :, :].reshape(h, w, 4) 641 var_image = analysis_image[1, :, :, :].reshape(h, w, 4) 642 return mean_image, var_image 643 644 645def get_image_patch(img, xnorm, ynorm, wnorm, hnorm): 646 """Get a patch (tile) of an image. 647 648 Args: 649 img: Numpy float image array, with pixel values in [0,1]. 650 xnorm: 651 ynorm: 652 wnorm: 653 hnorm: Normalized (in [0,1]) coords for the tile. 654 655 Returns: 656 Numpy float image array of the patch. 657 """ 658 hfull = img.shape[0] 659 wfull = img.shape[1] 660 xtile = int(math.ceil(xnorm * wfull)) 661 ytile = int(math.ceil(ynorm * hfull)) 662 wtile = int(math.floor(wnorm * wfull)) 663 htile = int(math.floor(hnorm * hfull)) 664 if len(img.shape) == 2: 665 return img[ytile:ytile + htile, xtile:xtile + wtile].copy() 666 else: 667 return img[ytile:ytile + htile, xtile:xtile + wtile, :].copy() 668 669 670def compute_image_means(img): 671 """Calculate the mean of each color channel in the image. 672 673 Args: 674 img: Numpy float image array, with pixel values in [0,1]. 675 676 Returns: 677 A list of mean values, one per color channel in the image. 678 """ 679 means = [] 680 chans = img.shape[2] 681 for i in range(chans): 682 means.append(numpy.mean(img[:, :, i], dtype=numpy.float64)) 683 return means 684 685 686def compute_image_variances(img): 687 """Calculate the variance of each color channel in the image. 688 689 Args: 690 img: Numpy float image array, with pixel values in [0,1]. 691 692 Returns: 693 A list of variance values, one per color channel in the image. 694 """ 695 variances = [] 696 chans = img.shape[2] 697 for i in range(chans): 698 variances.append(numpy.var(img[:, :, i], dtype=numpy.float64)) 699 return variances 700 701 702def compute_image_sharpness(img): 703 """Calculate the sharpness of input image. 704 705 Args: 706 img: numpy float RGB/luma image array, with pixel values in [0,1]. 707 708 Returns: 709 Sharpness estimation value based on the average of gradient magnitude. 710 Larger value means the image is sharper. 711 """ 712 chans = img.shape[2] 713 if chans != 1 and chans != 3: 714 raise AssertionError(f'Not RGB or MONO image! depth: {chans}') 715 if chans == 1: 716 luma = img[:, :, 0] 717 else: 718 luma = convert_rgb_to_grayscale(img) 719 gy, gx = numpy.gradient(luma) 720 return numpy.average(numpy.sqrt(gy*gy + gx*gx)) 721 722 723def compute_image_max_gradients(img): 724 """Calculate the maximum gradient of each color channel in the image. 725 726 Args: 727 img: Numpy float image array, with pixel values in [0,1]. 728 729 Returns: 730 A list of gradient max values, one per color channel in the image. 731 """ 732 grads = [] 733 chans = img.shape[2] 734 for i in range(chans): 735 grads.append(numpy.amax(numpy.gradient(img[:, :, i]))) 736 return grads 737 738 739def compute_image_snrs(img): 740 """Calculate the SNR (dB) of each color channel in the image. 741 742 Args: 743 img: Numpy float image array, with pixel values in [0,1]. 744 745 Returns: 746 A list of SNR values in dB, one per color channel in the image. 747 """ 748 means = compute_image_means(img) 749 variances = compute_image_variances(img) 750 std_devs = [math.sqrt(v) for v in variances] 751 snrs = [20 * math.log10(m/s) for m, s in zip(means, std_devs)] 752 return snrs 753 754 755def convert_rgb_to_grayscale(img): 756 """Convert and 3-D array RGB image to grayscale image. 757 758 Args: 759 img: numpy float RGB/luma image array, with pixel values in [0,1]. 760 761 Returns: 762 2-D grayscale image 763 """ 764 chans = img.shape[2] 765 if chans != 3: 766 raise AssertionError(f'Not an RGB image! Depth: {chans}') 767 return 0.299*img[:, :, 0] + 0.587*img[:, :, 1] + 0.114*img[:, :, 2] 768 769 770def normalize_img(img): 771 """Normalize the image values to between 0 and 1. 772 773 Args: 774 img: 2-D numpy array of image values 775 Returns: 776 Normalized image 777 """ 778 return (img - numpy.amin(img))/(numpy.amax(img) - numpy.amin(img)) 779 780 781def rotate_img_per_argv(img): 782 """Rotate an image 180 degrees if "rotate" is in argv. 783 784 Args: 785 img: 2-D numpy array of image values 786 Returns: 787 Rotated image 788 """ 789 img_out = img 790 if 'rotate180' in sys.argv: 791 img_out = numpy.fliplr(numpy.flipud(img_out)) 792 return img_out 793 794 795def stationary_lens_cap(cam, req, fmt): 796 """Take up to NUM_TRYS caps and save the 1st one with lens stationary. 797 798 Args: 799 cam: open device session 800 req: capture request 801 fmt: format for capture 802 803 Returns: 804 capture 805 """ 806 tries = 0 807 done = False 808 reqs = [req] * NUM_FRAMES 809 while not done: 810 logging.debug('Waiting for lens to move to correct location.') 811 cap = cam.do_capture(reqs, fmt) 812 done = (cap[NUM_FRAMES - 1]['metadata']['android.lens.state'] == 0) 813 logging.debug('status: %s', done) 814 tries += 1 815 if tries == NUM_TRIES: 816 raise error_util.CameraItsError('Cannot settle lens after %d tries!' % 817 tries) 818 return cap[NUM_FRAMES - 1] 819 820 821def compute_image_rms_difference_1d(rgb_x, rgb_y): 822 """Calculate the RMS difference between 2 RBG images as 1D arrays. 823 824 Args: 825 rgb_x: image array 826 rgb_y: image array 827 828 Returns: 829 rms_diff 830 """ 831 len_rgb_x = len(rgb_x) 832 len_rgb_y = len(rgb_y) 833 if len_rgb_y != len_rgb_x: 834 raise AssertionError('RGB images have different number of planes! ' 835 f'x: {len_rgb_x}, y: {len_rgb_y}') 836 return math.sqrt(sum([pow(rgb_x[i] - rgb_y[i], 2.0) 837 for i in range(len_rgb_x)]) / len_rgb_x) 838 839 840def compute_image_rms_difference_3d(rgb_x, rgb_y): 841 """Calculate the RMS difference between 2 RBG images as 3D arrays. 842 843 Args: 844 rgb_x: image array in the form of w * h * channels 845 rgb_y: image array in the form of w * h * channels 846 847 Returns: 848 rms_diff 849 """ 850 shape_rgb_x = numpy.shape(rgb_x) 851 shape_rgb_y = numpy.shape(rgb_y) 852 if shape_rgb_y != shape_rgb_x: 853 raise AssertionError('RGB images have different number of planes! ' 854 f'x: {shape_rgb_x}, y: {shape_rgb_y}') 855 if len(shape_rgb_x) != 3: 856 raise AssertionError(f'RGB images dimension {len(shape_rgb_x)} is not 3!') 857 858 mean_square_sum = 0.0 859 for i in range(shape_rgb_x[0]): 860 for j in range(shape_rgb_x[1]): 861 for k in range(shape_rgb_x[2]): 862 mean_square_sum += pow(rgb_x[i][j][k] - rgb_y[i][j][k], 2.0) 863 return (math.sqrt(mean_square_sum / 864 (shape_rgb_x[0] * shape_rgb_x[1] * shape_rgb_x[2]))) 865 866 867class ImageProcessingUtilsTest(unittest.TestCase): 868 """Unit tests for this module.""" 869 _SQRT_2 = numpy.sqrt(2) 870 _YUV_FULL_SCALE = 1023 871 872 def test_unpack_raw10_image(self): 873 """Unit test for unpack_raw10_image. 874 875 RAW10 bit packing format 876 bit 7 bit 6 bit 5 bit 4 bit 3 bit 2 bit 1 bit 0 877 Byte 0: P0[9] P0[8] P0[7] P0[6] P0[5] P0[4] P0[3] P0[2] 878 Byte 1: P1[9] P1[8] P1[7] P1[6] P1[5] P1[4] P1[3] P1[2] 879 Byte 2: P2[9] P2[8] P2[7] P2[6] P2[5] P2[4] P2[3] P2[2] 880 Byte 3: P3[9] P3[8] P3[7] P3[6] P3[5] P3[4] P3[3] P3[2] 881 Byte 4: P3[1] P3[0] P2[1] P2[0] P1[1] P1[0] P0[1] P0[0] 882 """ 883 # Test using a random 4x4 10-bit image 884 img_w, img_h = 4, 4 885 check_list = random.sample(range(0, 1024), img_h*img_w) 886 img_check = numpy.array(check_list).reshape(img_h, img_w) 887 888 # Pack bits 889 for row_start in range(0, len(check_list), img_w): 890 msbs = [] 891 lsbs = '' 892 for pixel in range(img_w): 893 val = numpy.binary_repr(check_list[row_start+pixel], 10) 894 msbs.append(int(val[:8], base=2)) 895 lsbs = val[8:] + lsbs 896 packed = msbs 897 packed.append(int(lsbs, base=2)) 898 chunk_raw10 = numpy.array(packed, dtype='uint8').reshape(1, 5) 899 if row_start == 0: 900 img_raw10 = chunk_raw10 901 else: 902 img_raw10 = numpy.vstack((img_raw10, chunk_raw10)) 903 904 # Unpack and check against original 905 self.assertTrue(numpy.array_equal(unpack_raw10_image(img_raw10), 906 img_check)) 907 908 def test_compute_image_sharpness(self): 909 """Unit test for compute_img_sharpness. 910 911 Tests by using PNG of ISO12233 chart and blurring intentionally. 912 'sharpness' should drop off by sqrt(2) for 2x blur of image. 913 914 We do one level of initial blur as PNG image is not perfect. 915 """ 916 blur_levels = [2, 4, 8] 917 chart_file = os.path.join(TEST_IMG_DIR, 'ISO12233.png') 918 chart = cv2.imread(chart_file, cv2.IMREAD_ANYDEPTH) 919 white_level = numpy.amax(chart).astype(float) 920 sharpness = {} 921 for blur in blur_levels: 922 chart_blurred = cv2.blur(chart, (blur, blur)) 923 chart_blurred = chart_blurred[:, :, numpy.newaxis] 924 sharpness[blur] = self._YUV_FULL_SCALE * compute_image_sharpness( 925 chart_blurred / white_level) 926 927 for i in range(len(blur_levels)-1): 928 self.assertTrue(numpy.isclose( 929 sharpness[blur_levels[i]]/sharpness[blur_levels[i+1]], self._SQRT_2, 930 atol=0.1)) 931 932 def test_apply_lut_to_image(self): 933 """Unit test for apply_lut_to_image. 934 935 Test by using a canned set of values on a 1x1 pixel image. 936 The look-up table should double the value of the index: lut[x] = x*2 937 """ 938 ref_image = [0.1, 0.2, 0.3] 939 lut_max = 65536 940 lut = numpy.array([i*2 for i in range(lut_max)]) 941 x = numpy.array(ref_image).reshape((1, 1, 3)) 942 y = apply_lut_to_image(x, lut).reshape(3).tolist() 943 y_ref = [i*2 for i in ref_image] 944 self.assertTrue(numpy.allclose(y, y_ref, atol=1/lut_max)) 945 946 947if __name__ == '__main__': 948 unittest.main() 949