1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_SUPPORT_CC_TASK_VISION_CORE_FRAME_BUFFER_H_ 17 #define TENSORFLOW_LITE_SUPPORT_CC_TASK_VISION_CORE_FRAME_BUFFER_H_ 18 19 #include <map> 20 #include <memory> 21 #include <string> 22 #include <utility> 23 #include <vector> 24 25 #include "absl/memory/memory.h" 26 #include "absl/status/status.h" 27 #include "absl/strings/str_cat.h" 28 #include "absl/time/clock.h" 29 #include "absl/time/time.h" 30 #include "absl/types/any.h" 31 #include "absl/types/optional.h" 32 #include "tensorflow_lite_support/cc/port/integral_types.h" 33 #include "tensorflow_lite_support/cc/port/statusor.h" 34 35 namespace tflite { 36 namespace task { 37 namespace vision { 38 39 // A `FrameBuffer` provides a view into the provided backing buffer (e.g. camera 40 // frame or still image) with buffer format information. FrameBuffer doesn't 41 // take ownership of the provided backing buffer. The caller is responsible to 42 // manage the backing buffer lifecycle for the lifetime of the FrameBuffer. 43 // 44 // FrameBuffer also provides a tagging system to allow the client of FrameBuffer 45 // to attach arbitrary tags to an instance. The tagging system is meant for 46 // small set of metadata. FrameBuffer does not use the tags in anyway. The 47 // uniqueness of the tag is only guarded by the uniqueness of the key. 48 // The tag is useful when the uniqueness of a FrameBuffer can not be determined 49 // by its associated metadata. For example, there are two FrameBuffer instances 50 // with the same metadata (size dimension, orientation, format, etc) but one is 51 // generated through cropping of Frame A and another is generated by resizing of 52 // Frame A. The client can tag one of the generated FrameBuffer to distinguish 53 // the difference. 54 // 55 // Examples: 56 // 57 // // Create an metadata instance with no backing buffer. 58 // auto buffer = FrameBuffer::Create(/*planes=*/{}, dimension, kRGBA, 59 // KTopLeft); 60 // 61 // // Create an RGBA instance with backing buffer on single plane. 62 // FrameBuffer::Plane plane = {rgba_buffer, /*stride=*/{dimension.width * 4, 63 // 4}}; auto buffer = FrameBuffer::Create({plane}, dimension, kRGBA, kTopLeft); 64 // 65 // // Create an YUV instance with planar backing buffer. 66 // FrameBuffer::Plane y_plane = {y_buffer, /*stride=*/{dimension.width , 1}}; 67 // FrameBuffer::Plane uv_plane = {u_buffer, /*stride=*/{dimension.width, 2}}; 68 // auto buffer = FrameBuffer::Create({y_plane, uv_plane}, dimension, kNV21, 69 // kLeftTop); 70 // 71 // // Add / retrieve tags from a FrameBuffer instance. 72 // buffer.InsertTag("my_special_key", 1); 73 // buffer.GetTag("my_special_key"); 74 // 75 class FrameBuffer { 76 public: 77 // Colorspace formats. 78 enum class Format { kRGBA, kRGB, kNV12, kNV21, kYV12, kYV21, kGRAY }; 79 80 // Stride information. 81 struct Stride { 82 // The row stride in bytes. This is the distance between the start pixels of 83 // two consecutive rows in the image. 84 int row_stride_bytes; 85 // This is the distance between two consecutive pixel values in a row of 86 // pixels in bytes. It may be larger than the size of a single pixel to 87 // account for interleaved image data or padded formats. 88 int pixel_stride_bytes; 89 }; 90 91 // YUV data structure. 92 struct YuvData { 93 const uint8* y_buffer; 94 const uint8* u_buffer; 95 const uint8* v_buffer; 96 // Y buffer row stride in bytes. 97 int y_row_stride; 98 // U/V buffer row stride in bytes. 99 int uv_row_stride; 100 // U/V pixel stride in bytes. This is the distance between two consecutive 101 // u/v pixel values in a row. 102 int uv_pixel_stride; 103 }; 104 105 // FrameBuffer content orientation follows EXIF specification. The name of 106 // each enum value defines the position of the 0th row and the 0th column of 107 // the image content. See http://jpegclub.org/exif_orientation.html for 108 // details. 109 enum class Orientation { 110 kTopLeft = 1, 111 kTopRight = 2, 112 kBottomRight = 3, 113 kBottomLeft = 4, 114 kLeftTop = 5, 115 kRightTop = 6, 116 kRightBottom = 7, 117 kLeftBottom = 8 118 }; 119 120 // Plane encapsulates buffer and stride information. 121 struct Plane { 122 const uint8* buffer; 123 Stride stride; 124 }; 125 126 // Dimension information for the whole frame or a cropped portion of it. 127 struct Dimension { 128 // The width dimension in pixel unit. 129 int width; 130 // The height dimension in pixel unit. 131 int height; 132 133 bool operator==(const Dimension& other) const { 134 return width == other.width && height == other.height; 135 } 136 137 bool operator!=(const Dimension& other) const { 138 return width != other.width || height != other.height; 139 } 140 141 bool operator>=(const Dimension& other) const { 142 return width >= other.width && height >= other.height; 143 } 144 145 bool operator<=(const Dimension& other) const { 146 return width <= other.width && height <= other.height; 147 } 148 149 // Swaps width and height. SwapDimension150 void Swap() { 151 using std::swap; 152 swap(width, height); 153 } 154 155 // Returns area represented by width * height. SizeDimension156 int Size() const { return width * height; } 157 }; 158 159 // Factory method for creating a FrameBuffer object from row-major backing 160 // buffers. In a streaming use case (e.g continuous camera stream), the 161 // timestamp can be used as an ID to identify a frame. Create(const std::vector<Plane> & planes,Dimension dimension,Format format,Orientation orientation,absl::Time timestamp)162 static std::unique_ptr<FrameBuffer> Create(const std::vector<Plane>& planes, 163 Dimension dimension, Format format, 164 Orientation orientation, 165 absl::Time timestamp) { 166 return absl::make_unique<FrameBuffer>(planes, dimension, format, 167 orientation, timestamp); 168 } 169 170 // Factory method for creating a FrameBuffer object from row-major movable 171 // backing buffers. In a streaming use case (e.g continuous camera stream), 172 // the timestamp can be used as an ID to identify a frame. Create(std::vector<Plane> && planes,Dimension dimension,Format format,Orientation orientation,absl::Time timestamp)173 static std::unique_ptr<FrameBuffer> Create(std::vector<Plane>&& planes, 174 Dimension dimension, Format format, 175 Orientation orientation, 176 absl::Time timestamp) { 177 return absl::make_unique<FrameBuffer>(std::move(planes), dimension, format, 178 orientation, timestamp); 179 } 180 181 // Factory method for creating a FrameBuffer object from row-major backing 182 // buffers. By default this method set the timestamp to now. This method is 183 // more suitable for processing use case that does not need to re-identify 184 // this buffer. Create(const std::vector<Plane> & planes,Dimension dimension,Format format,Orientation orientation)185 static std::unique_ptr<FrameBuffer> Create(const std::vector<Plane>& planes, 186 Dimension dimension, Format format, 187 Orientation orientation) { 188 return absl::make_unique<FrameBuffer>(planes, dimension, format, 189 orientation, absl::Now()); 190 } 191 192 // Factory method for creating a FrameBuffer object from movable row-major 193 // backing buffers. By default this method set the timestamp to now. This 194 // method is more suitable for processing use case that does not need to 195 // re-identify this buffer. Create(std::vector<Plane> && planes,Dimension dimension,Format format,Orientation orientation)196 static std::unique_ptr<FrameBuffer> Create(std::vector<Plane>&& planes, 197 Dimension dimension, Format format, 198 Orientation orientation) { 199 return absl::make_unique<FrameBuffer>(std::move(planes), dimension, format, 200 orientation, absl::Now()); 201 } 202 203 // Returns YuvData which contains the Y, U, and V buffer and their 204 // stride info from the input `source` FrameBuffer which is in the YUV family 205 // formats (e.g NV12, NV21, YV12, and YV21). 206 static tflite::support::StatusOr<YuvData> GetYuvDataFromFrameBuffer( 207 const FrameBuffer& source); 208 209 // Builds a FrameBuffer object from a row-major backing buffer. 210 // 211 // The FrameBuffer does not take ownership of the backing buffer. The backing 212 // buffer is read-only and the caller is responsible for maintaining the 213 // backing buffer lifecycle for the lifetime of FrameBuffer. FrameBuffer(const std::vector<Plane> & planes,Dimension dimension,Format format,Orientation orientation,absl::Time timestamp)214 FrameBuffer(const std::vector<Plane>& planes, Dimension dimension, 215 Format format, Orientation orientation, absl::Time timestamp) 216 : planes_(planes), 217 dimension_(dimension), 218 format_(format), 219 orientation_(orientation), 220 timestamp_(timestamp) {} 221 222 // Builds a FrameBuffer object from a movable row-major backing buffer. 223 // 224 // The FrameBuffer does not take ownership of the backing buffer. The backing 225 // buffer is read-only and the caller is responsible for maintaining the 226 // backing buffer lifecycle for the lifetime of FrameBuffer. FrameBuffer(std::vector<Plane> && planes,Dimension dimension,Format format,Orientation orientation,absl::Time timestamp)227 FrameBuffer(std::vector<Plane>&& planes, Dimension dimension, Format format, 228 Orientation orientation, absl::Time timestamp) 229 : planes_(std::move(planes)), 230 dimension_(dimension), 231 format_(format), 232 orientation_(orientation), 233 timestamp_(timestamp) {} 234 235 // Returns number of planes. plane_count()236 const int plane_count() const { return planes_.size(); } 237 238 // Returns plane indexed by the input `index`. plane(int index)239 const Plane plane(int index) const { 240 if (index > -1 && index < planes_.size()) { 241 return planes_[index]; 242 } 243 return {}; 244 } 245 246 // Returns the tag associated to the tag_key. GetTag(const std::string & tag_key)247 absl::any GetTag(const std::string& tag_key) const { 248 auto iter = tags_.find(tag_key); 249 if (iter != tags_.end()) { 250 return iter->second; 251 } 252 return absl::any(); 253 } 254 255 // Inserts or updates the tags map with key value pair (tag_key, tag_value). InsertOrUpdateTag(const std::string & tag_key,absl::any tag_value)256 void InsertOrUpdateTag(const std::string& tag_key, absl::any tag_value) { 257 tags_[tag_key] = std::move(tag_value); 258 } 259 260 // Inserts the key value pair (tag_key, tag_value) into tags map. If the 261 // tag_key already exists, an internal error will return. InsertTag(const std::string & tag_key,absl::any tag_value)262 absl::Status InsertTag(const std::string& tag_key, absl::any tag_value) { 263 auto iter = tags_.emplace(tag_key, tag_value); 264 if (iter.second) { 265 return absl::OkStatus(); 266 } 267 return absl::InternalError(absl::StrCat( 268 "tag_key already exists in tags.tag_key was not inserted: ", tag_key)); 269 } 270 271 // Returns FrameBuffer dimension. dimension()272 const Dimension dimension() const { return dimension_; } 273 274 // Returns FrameBuffer format. format()275 const Format format() const { return format_; } 276 277 // Returns FrameBuffer orientation. orientation()278 const Orientation orientation() const { return orientation_; } 279 280 // Returns FrameBuffer timestamp. timestamp()281 const absl::Time timestamp() const { return timestamp_; } 282 283 private: 284 std::vector<Plane> planes_; 285 std::map<std::string, absl::any> tags_; 286 Dimension dimension_; 287 Format format_; 288 Orientation orientation_; 289 absl::Time timestamp_; 290 }; 291 292 } // namespace vision 293 } // namespace task 294 } // namespace tflite 295 296 #endif // TENSORFLOW_LITE_SUPPORT_CC_TASK_VISION_CORE_FRAME_BUFFER_H_ 297