• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 // Note: ported from Chromium commit head: 85fdf90
5 
6 #ifndef V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
7 #define V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
8 
9 #include <stddef.h>
10 #include <stdint.h>
11 
12 #include <memory>
13 #include <queue>
14 #include <utility>
15 #include <vector>
16 
17 #include "base/macros.h"
18 #include "base/memory/linked_ptr.h"
19 #include "base/memory/ref_counted.h"
20 #include "base/memory/weak_ptr.h"
21 #include "base/synchronization/waitable_event.h"
22 #include "base/threading/thread.h"
23 #include "h264_decoder.h"
24 #include "v4l2_device.h"
25 #include "video_decode_accelerator.h"
26 #include "videodev2.h"
27 #include "vp8_decoder.h"
28 #include "vp9_decoder.h"
29 
30 namespace media {
31 
32 // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice
33 // level codec API for decoding. The slice level API provides only a low-level
34 // decoding functionality and requires userspace to provide support for parsing
35 // the input stream and managing decoder state across frames.
36 class V4L2SliceVideoDecodeAccelerator
37     : public VideoDecodeAccelerator {
38  public:
39   class V4L2DecodeSurface;
40 
41   V4L2SliceVideoDecodeAccelerator(
42       const scoped_refptr<V4L2Device>& device);
43   ~V4L2SliceVideoDecodeAccelerator() override;
44 
45   // VideoDecodeAccelerator implementation.
46   bool Initialize(const Config& config, Client* client) override;
47   void Decode(const BitstreamBuffer& bitstream_buffer) override;
48   void AssignPictureBuffers(const std::vector<PictureBuffer>& buffers) override;
49   void ImportBufferForPicture(
50       int32_t picture_buffer_id,
51       VideoPixelFormat pixel_format,
52       const NativePixmapHandle& native_pixmap_handle) override;
53   void ReusePictureBuffer(int32_t picture_buffer_id) override;
54   void Flush() override;
55   void Reset() override;
56   void Destroy() override;
57   bool TryToSetupDecodeOnSeparateThread(
58       const base::WeakPtr<Client>& decode_client,
59       const scoped_refptr<base::SingleThreadTaskRunner>& decode_task_runner)
60       override;
61 
62   static VideoDecodeAccelerator::SupportedProfiles GetSupportedProfiles();
63 
64  private:
65   class V4L2H264Accelerator;
66   class V4L2VP8Accelerator;
67   class V4L2VP9Accelerator;
68 
69   // Record for input buffers.
70   struct InputRecord {
71     InputRecord();
72     int32_t input_id;
73     void* address;
74     size_t length;
75     size_t bytes_used;
76     bool at_device;
77   };
78 
79   // Record for output buffers.
80   struct OutputRecord {
81     OutputRecord();
82     OutputRecord(OutputRecord&&) = default;
83     bool at_device;
84     bool at_client;
85     int32_t picture_id;
86     std::vector<base::ScopedFD> dmabuf_fds;
87     bool cleared;
88   };
89 
90   // See http://crbug.com/255116.
91   // Input bitstream buffer size for up to 1080p streams.
92   const size_t kInputBufferMaxSizeFor1080p = 1024 * 1024;
93   // Input bitstream buffer size for up to 4k streams.
94   const size_t kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p;
95   const size_t kNumInputBuffers = 16;
96 
97   // Input format V4L2 fourccs this class supports.
98   static const uint32_t supported_input_fourccs_[];
99 
100   //
101   // Below methods are used by accelerator implementations.
102   //
103   // Append slice data in |data| of size |size| to pending hardware
104   // input buffer with |index|. This buffer will be submitted for decode
105   // on the next DecodeSurface(). Return true on success.
106   bool SubmitSlice(int index, const uint8_t* data, size_t size);
107 
108   // Submit controls in |ext_ctrls| to hardware. Return true on success.
109   bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls);
110 
111   // Gets current control values for controls in |ext_ctrls| from the driver.
112   // Return true on success.
113   bool GetExtControls(struct v4l2_ext_controls* ext_ctrls);
114 
115   // Return true if the driver exposes V4L2 control |ctrl_id|, false otherwise.
116   bool IsCtrlExposed(uint32_t ctrl_id);
117 
118   // Decode of |dec_surface| is ready to be submitted and all codec-specific
119   // settings are set in hardware.
120   void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
121 
122   // |dec_surface| is ready to be outputted once decode is finished.
123   // This can be called before decode is actually done in hardware, and this
124   // method is responsible for maintaining the ordering, i.e. the surfaces will
125   // be outputted in the same order as SurfaceReady calls. To do so, the
126   // surfaces are put on decoder_display_queue_ and sent to output in that
127   // order once all preceding surfaces are sent.
128   void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
129 
130   //
131   // Internal methods of this class.
132   //
133   // Recycle a V4L2 input buffer with |index| after dequeuing from device.
134   void ReuseInputBuffer(int index);
135 
136   // Recycle V4L2 output buffer with |index|. Used as surface release callback.
137   void ReuseOutputBuffer(int index);
138 
139   // Queue a |dec_surface| to device for decoding.
140   void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
141 
142   // Dequeue any V4L2 buffers available and process.
143   void Dequeue();
144 
145   // V4L2 QBUF helpers.
146   bool EnqueueInputRecord(int index, uint32_t config_store);
147   bool EnqueueOutputRecord(int index);
148 
149   // Set input and output formats in hardware.
150   bool SetupFormats();
151 
152   // Create input and output buffers.
153   bool CreateInputBuffers();
154   bool CreateOutputBuffers();
155 
156   // Destroy input buffers.
157   void DestroyInputBuffers();
158 
159   // Destroy output buffers. If |dismiss| is true, also dismissing the
160   // associated PictureBuffers.
161   bool DestroyOutputs(bool dismiss);
162 
163   // Used by DestroyOutputs.
164   bool DestroyOutputBuffers();
165 
166   // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer()
167   // and signal |done| after finishing.
168   void DismissPictures(const std::vector<int32_t>& picture_buffer_ids,
169                        base::WaitableEvent* done);
170 
171   // Task to finish initialization on decoder_thread_.
172   void InitializeTask();
173 
174   void NotifyError(Error error);
175   void DestroyTask();
176 
177   // Sets the state to kError and notifies client if needed.
178   void SetErrorState(Error error);
179 
180   // Event handling. Events include flush, reset and resolution change and are
181   // processed while in kIdle state.
182 
183   // Surface set change (resolution change) flow.
184   // If we have no surfaces allocated, start it immediately, otherwise mark
185   // ourselves as pending for surface set change.
186   void InitiateSurfaceSetChange();
187   // If a surface set change is pending and we are ready, stop the device,
188   // destroy outputs, releasing resources and dismissing pictures as required,
189   // followed by starting the flow to allocate a new set for the current
190   // resolution/DPB size, as provided by decoder.
191   bool FinishSurfaceSetChange();
192 
193   // Flush flow when requested by client.
194   // When Flush() is called, it posts a FlushTask, which checks the input queue.
195   // If nothing is pending for decode on decoder_input_queue_, we call
196   // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef
197   // onto the decoder_input_queue_ to schedule a flush. When we reach it later
198   // on, we call InitiateFlush() to perform it at the correct time.
199   void FlushTask();
200   // Tell the decoder to flush all frames, reset it and mark us as scheduled
201   // for flush, so that we can finish it once all pending decodes are finished.
202   void InitiateFlush();
203   // To be called if decoder_flushing_ is true. If not all pending frames are
204   // decoded, return false, requesting the caller to try again later.
205   // Otherwise perform flush by sending all pending pictures to the client,
206   // notify it that flush is finished and return true, informing the caller
207   // that further progress can be made.
208   bool FinishFlush();
209 
210   // Reset flow when requested by client.
211   // Drop all inputs, reset the decoder and mark us as pending for reset.
212   void ResetTask();
213   // To be called if decoder_resetting_ is true. If not all pending frames are
214   // decoded, return false, requesting the caller to try again later.
215   // Otherwise perform reset by dropping all pending outputs (client is not
216   // interested anymore), notifying it that reset is finished, and return true,
217   // informing the caller that further progress can be made.
218   bool FinishReset();
219 
220   // Called when a new event is pended. Transitions us into kIdle state (if not
221   // already in it), if possible. Also starts processing events.
222   void NewEventPending();
223 
224   // Called after all events are processed successfully (i.e. all Finish*()
225   // methods return true) to return to decoding state.
226   bool FinishEventProcessing();
227 
228   // Process pending events, if any.
229   void ProcessPendingEventsIfNeeded();
230 
231   // Allocate V4L2 buffers and assign them to |buffers| provided by the client
232   // via AssignPictureBuffers() on decoder thread.
233   void AssignPictureBuffersTask(const std::vector<PictureBuffer>& buffers);
234 
235   // Use buffer backed by dmabuf file descriptors in |passed_dmabuf_fds| for the
236   // OutputRecord associated with |picture_buffer_id|, taking ownership of the
237   // file descriptors.
238   void ImportBufferForPictureTask(
239       int32_t picture_buffer_id,
240       // TODO(posciak): (https://crbug.com/561749) we should normally be able to
241       // pass the vector by itself via std::move, but it's not possible to do
242       // this if this method is used as a callback.
243       std::unique_ptr<std::vector<base::ScopedFD>> passed_dmabuf_fds);
244 
245   // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_
246   // returning an event.
247   void ServiceDeviceTask();
248 
249   // Schedule poll if we have any buffers queued and the poll thread
250   // is not stopped (on surface set change).
251   void SchedulePollIfNeeded();
252 
253   // Attempt to start/stop device_poll_thread_.
254   bool StartDevicePoll();
255   bool StopDevicePoll(bool keep_input_state);
256 
257   // Ran on device_poll_thread_ to wait for device events.
258   void DevicePollTask(bool poll_device);
259 
260   enum State {
261     // We are in this state until Initialize() returns successfully.
262     // We can't post errors to the client in this state yet.
263     kUninitialized,
264     // Initialize() returned successfully.
265     kInitialized,
266     // This state allows making progress decoding more input stream.
267     kDecoding,
268     // Transitional state when we are not decoding any more stream, but are
269     // performing flush, reset, resolution change or are destroying ourselves.
270     kIdle,
271     // Requested new PictureBuffers via ProvidePictureBuffers(), awaiting
272     // AssignPictureBuffers().
273     kAwaitingPictureBuffers,
274     // Error state, set when sending NotifyError to client.
275     kError,
276   };
277 
278   // Buffer id for flush buffer, queued by FlushTask().
279   const int kFlushBufferId = -2;
280 
281   // Handler for Decode() on decoder_thread_.
282   void DecodeTask(const BitstreamBuffer& bitstream_buffer);
283 
284   // Schedule a new DecodeBufferTask if we are decoding.
285   void ScheduleDecodeBufferTaskIfNeeded();
286 
287   // Main decoder loop. Keep decoding the current buffer in decoder_, asking
288   // for more stream via TrySetNewBistreamBuffer() if decoder_ requests so,
289   // and handle other returns from it appropriately.
290   void DecodeBufferTask();
291 
292   // Check decoder_input_queue_ for any available buffers to decode and
293   // set the decoder_current_bitstream_buffer_ to the next buffer if one is
294   // available, taking it off the queue. Also set the current stream pointer
295   // in decoder_, and return true.
296   // Return false if no buffers are pending on decoder_input_queue_.
297   bool TrySetNewBistreamBuffer();
298 
299   // Auto-destruction reference for EGLSync (for message-passing).
300   void ReusePictureBufferTask(int32_t picture_buffer_id);
301 
302   // Called to actually send |dec_surface| to the client, after it is decoded
303   // preserving the order in which it was scheduled via SurfaceReady().
304   void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
305 
306   // Goes over the |decoder_display_queue_| and sends all buffers from the
307   // front of the queue that are already decoded to the client, in order.
308   void TryOutputSurfaces();
309 
310   // Creates a new decode surface or returns nullptr if one is not available.
311   scoped_refptr<V4L2DecodeSurface> CreateSurface();
312 
313   // Send decoded pictures to PictureReady.
314   void SendPictureReady();
315 
316   // Callback that indicates a picture has been cleared.
317   void PictureCleared();
318 
319   size_t input_planes_count_;
320   size_t output_planes_count_;
321 
322   // GPU Child thread task runner.
323   const scoped_refptr<base::SingleThreadTaskRunner> child_task_runner_;
324 
325   // Task runner Decode() and PictureReady() run on.
326   scoped_refptr<base::SingleThreadTaskRunner> decode_task_runner_;
327 
328   // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
329   // device worker threads back to the child thread.
330   base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_;
331 
332   // To expose client callbacks from VideoDecodeAccelerator.
333   // NOTE: all calls to these objects *MUST* be executed on
334   // child_task_runner_.
335   std::unique_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>>
336       client_ptr_factory_;
337   base::WeakPtr<VideoDecodeAccelerator::Client> client_;
338   // Callbacks to |decode_client_| must be executed on |decode_task_runner_|.
339   base::WeakPtr<Client> decode_client_;
340 
341   // V4L2 device in use.
342   scoped_refptr<V4L2Device> device_;
343 
344   // Thread to communicate with the device on.
345   base::Thread decoder_thread_;
346   scoped_refptr<base::SingleThreadTaskRunner> decoder_thread_task_runner_;
347 
348   // Thread used to poll the device for events.
349   base::Thread device_poll_thread_;
350 
351   // Input queue state.
352   bool input_streamon_;
353   // Number of input buffers enqueued to the device.
354   int input_buffer_queued_count_;
355   // Input buffers ready to use; LIFO since we don't care about ordering.
356   std::list<int> free_input_buffers_;
357   // Mapping of int index to an input buffer record.
358   std::vector<InputRecord> input_buffer_map_;
359 
360   // Output queue state.
361   bool output_streamon_;
362   // Number of output buffers enqueued to the device.
363   int output_buffer_queued_count_;
364   // Output buffers ready to use.
365   std::list<int> free_output_buffers_;
366   // Mapping of int index to an output buffer record.
367   std::vector<OutputRecord> output_buffer_map_;
368 
369   VideoCodecProfile video_profile_;
370   uint32_t input_format_fourcc_;
371   uint32_t output_format_fourcc_;
372   Size coded_size_;
373 
374   struct BitstreamBufferRef;
375   // Input queue of stream buffers coming from the client.
376   std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_;
377   // BitstreamBuffer currently being processed.
378   std::unique_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
379 
380   // Queue storing decode surfaces ready to be output as soon as they are
381   // decoded. The surfaces must be output in order they are queued.
382   std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_;
383 
384   // Decoder state.
385   State state_;
386 
387   Config::OutputMode output_mode_;
388 
389   // If any of these are true, we are waiting for the device to finish decoding
390   // all previously-queued frames, so we can finish the flush/reset/surface
391   // change flows. These can stack.
392   bool decoder_flushing_;
393   bool decoder_resetting_;
394   bool surface_set_change_pending_;
395 
396   // Hardware accelerators.
397   // TODO(posciak): Try to have a superclass here if possible.
398   std::unique_ptr<V4L2H264Accelerator> h264_accelerator_;
399   std::unique_ptr<V4L2VP8Accelerator> vp8_accelerator_;
400   std::unique_ptr<V4L2VP9Accelerator> vp9_accelerator_;
401 
402   // Codec-specific software decoder in use.
403   std::unique_ptr<AcceleratedVideoDecoder> decoder_;
404 
405   // Surfaces queued to device to keep references to them while decoded.
406   using V4L2DecodeSurfaceByOutputId =
407       std::map<int, scoped_refptr<V4L2DecodeSurface>>;
408   V4L2DecodeSurfaceByOutputId surfaces_at_device_;
409 
410   // Surfaces sent to client to keep references to them while displayed.
411   using V4L2DecodeSurfaceByPictureBufferId =
412       std::map<int32_t, scoped_refptr<V4L2DecodeSurface>>;
413   V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_;
414 
415   // Record for decoded pictures that can be sent to PictureReady.
416   struct PictureRecord {
417     PictureRecord(bool cleared, const Picture& picture);
418     ~PictureRecord();
419     bool cleared;  // Whether the texture is cleared and safe to render from.
420     Picture picture;  // The decoded picture.
421   };
422 
423   // Pictures that are ready but not sent to PictureReady yet.
424   std::queue<PictureRecord> pending_picture_ready_;
425 
426   // The number of pictures that are sent to PictureReady and will be cleared.
427   int picture_clearing_count_;
428 
429   // The WeakPtrFactory for |weak_this_|.
430   base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_;
431 
432   DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator);
433 };
434 
435 }  // namespace media
436 
437 #endif  // V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
438