• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_SUPPORT_CC_TASK_VISION_CORE_FRAME_BUFFER_H_
17 #define TENSORFLOW_LITE_SUPPORT_CC_TASK_VISION_CORE_FRAME_BUFFER_H_
18 
19 #include <map>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 #include "absl/memory/memory.h"
26 #include "absl/status/status.h"
27 #include "absl/strings/str_cat.h"
28 #include "absl/time/clock.h"
29 #include "absl/time/time.h"
30 #include "absl/types/any.h"
31 #include "absl/types/optional.h"
32 #include "tensorflow_lite_support/cc/port/integral_types.h"
33 #include "tensorflow_lite_support/cc/port/statusor.h"
34 
35 namespace tflite {
36 namespace task {
37 namespace vision {
38 
39 // A `FrameBuffer` provides a view into the provided backing buffer (e.g. camera
40 // frame or still image) with buffer format information. FrameBuffer doesn't
41 // take ownership of the provided backing buffer. The caller is responsible to
42 // manage the backing buffer lifecycle for the lifetime of the FrameBuffer.
43 //
44 // FrameBuffer also provides a tagging system to allow the client of FrameBuffer
45 // to attach arbitrary tags to an instance. The tagging system is meant for
46 // small set of metadata. FrameBuffer does not use the tags in anyway. The
47 // uniqueness of the tag is only guarded by the uniqueness of the key.
48 // The tag is useful when the uniqueness of a FrameBuffer can not be determined
49 // by its associated metadata. For example, there are two FrameBuffer instances
50 // with the same metadata (size dimension, orientation, format, etc) but one is
51 // generated through cropping of Frame A and another is generated by resizing of
52 // Frame A. The client can tag one of the generated FrameBuffer to distinguish
53 // the difference.
54 //
55 // Examples:
56 //
57 // // Create an metadata instance with no backing buffer.
58 // auto buffer = FrameBuffer::Create(/*planes=*/{}, dimension, kRGBA,
59 //                                   KTopLeft);
60 //
61 // // Create an RGBA instance with backing buffer on single plane.
62 // FrameBuffer::Plane plane = {rgba_buffer, /*stride=*/{dimension.width * 4,
63 // 4}}; auto buffer = FrameBuffer::Create({plane}, dimension, kRGBA, kTopLeft);
64 //
65 // // Create an YUV instance with planar backing buffer.
66 // FrameBuffer::Plane y_plane = {y_buffer, /*stride=*/{dimension.width , 1}};
67 // FrameBuffer::Plane uv_plane = {u_buffer, /*stride=*/{dimension.width, 2}};
68 // auto buffer = FrameBuffer::Create({y_plane, uv_plane}, dimension, kNV21,
69 //                                   kLeftTop);
70 //
71 // // Add / retrieve tags from a FrameBuffer instance.
72 // buffer.InsertTag("my_special_key", 1);
73 // buffer.GetTag("my_special_key");
74 //
75 class FrameBuffer {
76  public:
77   // Colorspace formats.
78   enum class Format { kRGBA, kRGB, kNV12, kNV21, kYV12, kYV21, kGRAY };
79 
80   // Stride information.
81   struct Stride {
82     // The row stride in bytes. This is the distance between the start pixels of
83     // two consecutive rows in the image.
84     int row_stride_bytes;
85     // This is the distance between two consecutive pixel values in a row of
86     // pixels in bytes. It may be larger than the size of a single pixel to
87     // account for interleaved image data or padded formats.
88     int pixel_stride_bytes;
89   };
90 
91   // YUV data structure.
92   struct YuvData {
93     const uint8* y_buffer;
94     const uint8* u_buffer;
95     const uint8* v_buffer;
96     // Y buffer row stride in bytes.
97     int y_row_stride;
98     // U/V buffer row stride in bytes.
99     int uv_row_stride;
100     // U/V pixel stride in bytes. This is the distance between two consecutive
101     // u/v pixel values in a row.
102     int uv_pixel_stride;
103   };
104 
105   // FrameBuffer content orientation follows EXIF specification. The name of
106   // each enum value defines the position of the 0th row and the 0th column of
107   // the image content. See http://jpegclub.org/exif_orientation.html for
108   // details.
109   enum class Orientation {
110     kTopLeft = 1,
111     kTopRight = 2,
112     kBottomRight = 3,
113     kBottomLeft = 4,
114     kLeftTop = 5,
115     kRightTop = 6,
116     kRightBottom = 7,
117     kLeftBottom = 8
118   };
119 
120   // Plane encapsulates buffer and stride information.
121   struct Plane {
122     const uint8* buffer;
123     Stride stride;
124   };
125 
126   // Dimension information for the whole frame or a cropped portion of it.
127   struct Dimension {
128     // The width dimension in pixel unit.
129     int width;
130     // The height dimension in pixel unit.
131     int height;
132 
133     bool operator==(const Dimension& other) const {
134       return width == other.width && height == other.height;
135     }
136 
137     bool operator!=(const Dimension& other) const {
138       return width != other.width || height != other.height;
139     }
140 
141     bool operator>=(const Dimension& other) const {
142       return width >= other.width && height >= other.height;
143     }
144 
145     bool operator<=(const Dimension& other) const {
146       return width <= other.width && height <= other.height;
147     }
148 
149     // Swaps width and height.
SwapDimension150     void Swap() {
151       using std::swap;
152       swap(width, height);
153     }
154 
155     // Returns area represented by width * height.
SizeDimension156     int Size() const { return width * height; }
157   };
158 
159   // Factory method for creating a FrameBuffer object from row-major backing
160   // buffers. In a streaming use case (e.g continuous camera stream), the
161   // timestamp can be used as an ID to identify a frame.
Create(const std::vector<Plane> & planes,Dimension dimension,Format format,Orientation orientation,absl::Time timestamp)162   static std::unique_ptr<FrameBuffer> Create(const std::vector<Plane>& planes,
163                                              Dimension dimension, Format format,
164                                              Orientation orientation,
165                                              absl::Time timestamp) {
166     return absl::make_unique<FrameBuffer>(planes, dimension, format,
167                                           orientation, timestamp);
168   }
169 
170   // Factory method for creating a FrameBuffer object from row-major movable
171   // backing buffers. In a streaming use case (e.g continuous camera stream),
172   // the timestamp can be used as an ID to identify a frame.
Create(std::vector<Plane> && planes,Dimension dimension,Format format,Orientation orientation,absl::Time timestamp)173   static std::unique_ptr<FrameBuffer> Create(std::vector<Plane>&& planes,
174                                              Dimension dimension, Format format,
175                                              Orientation orientation,
176                                              absl::Time timestamp) {
177     return absl::make_unique<FrameBuffer>(std::move(planes), dimension, format,
178                                           orientation, timestamp);
179   }
180 
181   // Factory method for creating a FrameBuffer object from row-major backing
182   // buffers. By default this method set the timestamp to now. This method is
183   // more suitable for processing use case that does not need to re-identify
184   // this buffer.
Create(const std::vector<Plane> & planes,Dimension dimension,Format format,Orientation orientation)185   static std::unique_ptr<FrameBuffer> Create(const std::vector<Plane>& planes,
186                                              Dimension dimension, Format format,
187                                              Orientation orientation) {
188     return absl::make_unique<FrameBuffer>(planes, dimension, format,
189                                           orientation, absl::Now());
190   }
191 
192   // Factory method for creating a FrameBuffer object from movable row-major
193   // backing buffers. By default this method set the timestamp to now. This
194   // method is more suitable for processing use case that does not need to
195   // re-identify this buffer.
Create(std::vector<Plane> && planes,Dimension dimension,Format format,Orientation orientation)196   static std::unique_ptr<FrameBuffer> Create(std::vector<Plane>&& planes,
197                                              Dimension dimension, Format format,
198                                              Orientation orientation) {
199     return absl::make_unique<FrameBuffer>(std::move(planes), dimension, format,
200                                           orientation, absl::Now());
201   }
202 
203   // Returns YuvData which contains the Y, U, and V buffer and their
204   // stride info from the input `source` FrameBuffer which is in the YUV family
205   // formats (e.g NV12, NV21, YV12, and YV21).
206   static tflite::support::StatusOr<YuvData> GetYuvDataFromFrameBuffer(
207       const FrameBuffer& source);
208 
209   // Builds a FrameBuffer object from a row-major backing buffer.
210   //
211   // The FrameBuffer does not take ownership of the backing buffer. The backing
212   // buffer is read-only and the caller is responsible for maintaining the
213   // backing buffer lifecycle for the lifetime of FrameBuffer.
FrameBuffer(const std::vector<Plane> & planes,Dimension dimension,Format format,Orientation orientation,absl::Time timestamp)214   FrameBuffer(const std::vector<Plane>& planes, Dimension dimension,
215               Format format, Orientation orientation, absl::Time timestamp)
216       : planes_(planes),
217         dimension_(dimension),
218         format_(format),
219         orientation_(orientation),
220         timestamp_(timestamp) {}
221 
222   // Builds a FrameBuffer object from a movable row-major backing buffer.
223   //
224   // The FrameBuffer does not take ownership of the backing buffer. The backing
225   // buffer is read-only and the caller is responsible for maintaining the
226   // backing buffer lifecycle for the lifetime of FrameBuffer.
FrameBuffer(std::vector<Plane> && planes,Dimension dimension,Format format,Orientation orientation,absl::Time timestamp)227   FrameBuffer(std::vector<Plane>&& planes, Dimension dimension, Format format,
228               Orientation orientation, absl::Time timestamp)
229       : planes_(std::move(planes)),
230         dimension_(dimension),
231         format_(format),
232         orientation_(orientation),
233         timestamp_(timestamp) {}
234 
235   // Returns number of planes.
plane_count()236   const int plane_count() const { return planes_.size(); }
237 
238   // Returns plane indexed by the input `index`.
plane(int index)239   const Plane plane(int index) const {
240     if (index > -1 && index < planes_.size()) {
241       return planes_[index];
242     }
243     return {};
244   }
245 
246   // Returns the tag associated to the tag_key.
GetTag(const std::string & tag_key)247   absl::any GetTag(const std::string& tag_key) const {
248     auto iter = tags_.find(tag_key);
249     if (iter != tags_.end()) {
250       return iter->second;
251     }
252     return absl::any();
253   }
254 
255   // Inserts or updates the tags map with key value pair (tag_key, tag_value).
InsertOrUpdateTag(const std::string & tag_key,absl::any tag_value)256   void InsertOrUpdateTag(const std::string& tag_key, absl::any tag_value) {
257     tags_[tag_key] = std::move(tag_value);
258   }
259 
260   // Inserts the key value pair (tag_key, tag_value) into tags map. If the
261   // tag_key already exists, an internal error will return.
InsertTag(const std::string & tag_key,absl::any tag_value)262   absl::Status InsertTag(const std::string& tag_key, absl::any tag_value) {
263     auto iter = tags_.emplace(tag_key, tag_value);
264     if (iter.second) {
265       return absl::OkStatus();
266     }
267     return absl::InternalError(absl::StrCat(
268         "tag_key already exists in tags.tag_key was not inserted: ", tag_key));
269   }
270 
271   // Returns FrameBuffer dimension.
dimension()272   const Dimension dimension() const { return dimension_; }
273 
274   // Returns FrameBuffer format.
format()275   const Format format() const { return format_; }
276 
277   // Returns FrameBuffer orientation.
orientation()278   const Orientation orientation() const { return orientation_; }
279 
280   // Returns FrameBuffer timestamp.
timestamp()281   const absl::Time timestamp() const { return timestamp_; }
282 
283  private:
284   std::vector<Plane> planes_;
285   std::map<std::string, absl::any> tags_;
286   Dimension dimension_;
287   Format format_;
288   Orientation orientation_;
289   absl::Time timestamp_;
290 };
291 
292 }  // namespace vision
293 }  // namespace task
294 }  // namespace tflite
295 
296 #endif  // TENSORFLOW_LITE_SUPPORT_CC_TASK_VISION_CORE_FRAME_BUFFER_H_
297