• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
6 #define V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <memory>
12 #include <queue>
13 #include <utility>
14 #include <vector>
15 
16 #include "base/macros.h"
17 #include "base/memory/linked_ptr.h"
18 #include "base/memory/ref_counted.h"
19 #include "base/memory/weak_ptr.h"
20 #include "base/synchronization/waitable_event.h"
21 #include "base/threading/thread.h"
22 #include "h264_decoder.h"
23 #include "v4l2_device.h"
24 #include "video_decode_accelerator.h"
25 #include "videodev2.h"
26 #include "vp8_decoder.h"
27 #include "vp9_decoder.h"
28 
29 namespace media {
30 
31 // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice
32 // level codec API for decoding. The slice level API provides only a low-level
33 // decoding functionality and requires userspace to provide support for parsing
34 // the input stream and managing decoder state across frames.
35 class V4L2SliceVideoDecodeAccelerator
36     : public VideoDecodeAccelerator {
37  public:
38   class V4L2DecodeSurface;
39 
40   V4L2SliceVideoDecodeAccelerator(
41       const scoped_refptr<V4L2Device>& device);
42   ~V4L2SliceVideoDecodeAccelerator() override;
43 
44   // VideoDecodeAccelerator implementation.
45   bool Initialize(const Config& config, Client* client) override;
46   void Decode(const BitstreamBuffer& bitstream_buffer) override;
47   void AssignPictureBuffers(const std::vector<PictureBuffer>& buffers) override;
48   void ImportBufferForPicture(
49       int32_t picture_buffer_id,
50       const std::vector<base::FileDescriptor>& dmabuf_fds) override;
51   void ReusePictureBuffer(int32_t picture_buffer_id) override;
52   void Flush() override;
53   void Reset() override;
54   void Destroy() override;
55   bool TryToSetupDecodeOnSeparateThread(
56       const base::WeakPtr<Client>& decode_client,
57       const scoped_refptr<base::SingleThreadTaskRunner>& decode_task_runner)
58       override;
59 
60   static VideoDecodeAccelerator::SupportedProfiles GetSupportedProfiles();
61 
62  private:
63   class V4L2H264Accelerator;
64   class V4L2VP8Accelerator;
65   class V4L2VP9Accelerator;
66 
67   // Record for input buffers.
68   struct InputRecord {
69     InputRecord();
70     int32_t input_id;
71     void* address;
72     size_t length;
73     size_t bytes_used;
74     bool at_device;
75   };
76 
77   // Record for output buffers.
78   struct OutputRecord {
79     OutputRecord();
80     bool at_device;
81     bool at_client;
82     int32_t picture_id;
83     std::vector<base::ScopedFD> dmabuf_fds;
84     bool cleared;
85   };
86 
87   // See http://crbug.com/255116.
88   // Input bitstream buffer size for up to 1080p streams.
89   const size_t kInputBufferMaxSizeFor1080p = 1024 * 1024;
90   // Input bitstream buffer size for up to 4k streams.
91   const size_t kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p;
92   const size_t kNumInputBuffers = 16;
93 
94   // Input format V4L2 fourccs this class supports.
95   static const uint32_t supported_input_fourccs_[];
96 
97   //
98   // Below methods are used by accelerator implementations.
99   //
100   // Append slice data in |data| of size |size| to pending hardware
101   // input buffer with |index|. This buffer will be submitted for decode
102   // on the next DecodeSurface(). Return true on success.
103   bool SubmitSlice(int index, const uint8_t* data, size_t size);
104 
105   // Submit controls in |ext_ctrls| to hardware. Return true on success.
106   bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls);
107 
108   // Gets current control values for controls in |ext_ctrls| from the driver.
109   // Return true on success.
110   bool GetExtControls(struct v4l2_ext_controls* ext_ctrls);
111 
112   // Return true if the driver exposes V4L2 control |ctrl_id|, false otherwise.
113   bool IsCtrlExposed(uint32_t ctrl_id);
114 
115   // Decode of |dec_surface| is ready to be submitted and all codec-specific
116   // settings are set in hardware.
117   void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
118 
119   // |dec_surface| is ready to be outputted once decode is finished.
120   // This can be called before decode is actually done in hardware, and this
121   // method is responsible for maintaining the ordering, i.e. the surfaces will
122   // be outputted in the same order as SurfaceReady calls. To do so, the
123   // surfaces are put on decoder_display_queue_ and sent to output in that
124   // order once all preceding surfaces are sent.
125   void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
126 
127   //
128   // Internal methods of this class.
129   //
130   // Recycle a V4L2 input buffer with |index| after dequeuing from device.
131   void ReuseInputBuffer(int index);
132 
133   // Recycle V4L2 output buffer with |index|. Used as surface release callback.
134   void ReuseOutputBuffer(int index);
135 
136   // Queue a |dec_surface| to device for decoding.
137   void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
138 
139   // Dequeue any V4L2 buffers available and process.
140   void Dequeue();
141 
142   // V4L2 QBUF helpers.
143   bool EnqueueInputRecord(int index, uint32_t config_store);
144   bool EnqueueOutputRecord(int index);
145 
146   // Set input and output formats in hardware.
147   bool SetupFormats();
148 
149   // Create input and output buffers.
150   bool CreateInputBuffers();
151   bool CreateOutputBuffers();
152 
153   // Destroy input buffers.
154   void DestroyInputBuffers();
155 
156   // Destroy output buffers. If |dismiss| is true, also dismissing the
157   // associated PictureBuffers.
158   bool DestroyOutputs(bool dismiss);
159 
160   // Used by DestroyOutputs.
161   bool DestroyOutputBuffers();
162 
163   // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer()
164   // and signal |done| after finishing.
165   void DismissPictures(const std::vector<int32_t>& picture_buffer_ids,
166                        base::WaitableEvent* done);
167 
168   // Task to finish initialization on decoder_thread_.
169   void InitializeTask();
170 
171   void NotifyError(Error error);
172   void DestroyTask();
173 
174   // Sets the state to kError and notifies client if needed.
175   void SetErrorState(Error error);
176 
177   // Event handling. Events include flush, reset and resolution change and are
178   // processed while in kIdle state.
179 
180   // Surface set change (resolution change) flow.
181   // If we have no surfaces allocated, start it immediately, otherwise mark
182   // ourselves as pending for surface set change.
183   void InitiateSurfaceSetChange();
184   // If a surface set change is pending and we are ready, stop the device,
185   // destroy outputs, releasing resources and dismissing pictures as required,
186   // followed by starting the flow to allocate a new set for the current
187   // resolution/DPB size, as provided by decoder.
188   bool FinishSurfaceSetChange();
189 
190   // Flush flow when requested by client.
191   // When Flush() is called, it posts a FlushTask, which checks the input queue.
192   // If nothing is pending for decode on decoder_input_queue_, we call
193   // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef
194   // onto the decoder_input_queue_ to schedule a flush. When we reach it later
195   // on, we call InitiateFlush() to perform it at the correct time.
196   void FlushTask();
197   // Tell the decoder to flush all frames, reset it and mark us as scheduled
198   // for flush, so that we can finish it once all pending decodes are finished.
199   void InitiateFlush();
200   // To be called if decoder_flushing_ is true. If not all pending frames are
201   // decoded, return false, requesting the caller to try again later.
202   // Otherwise perform flush by sending all pending pictures to the client,
203   // notify it that flush is finished and return true, informing the caller
204   // that further progress can be made.
205   bool FinishFlush();
206 
207   // Reset flow when requested by client.
208   // Drop all inputs, reset the decoder and mark us as pending for reset.
209   void ResetTask();
210   // To be called if decoder_resetting_ is true. If not all pending frames are
211   // decoded, return false, requesting the caller to try again later.
212   // Otherwise perform reset by dropping all pending outputs (client is not
213   // interested anymore), notifying it that reset is finished, and return true,
214   // informing the caller that further progress can be made.
215   bool FinishReset();
216 
217   // Called when a new event is pended. Transitions us into kIdle state (if not
218   // already in it), if possible. Also starts processing events.
219   void NewEventPending();
220 
221   // Called after all events are processed successfully (i.e. all Finish*()
222   // methods return true) to return to decoding state.
223   bool FinishEventProcessing();
224 
225   // Process pending events, if any.
226   void ProcessPendingEventsIfNeeded();
227 
228   // Allocate V4L2 buffers and assign them to |buffers| provided by the client
229   // via AssignPictureBuffers() on decoder thread.
230   void AssignPictureBuffersTask(const std::vector<PictureBuffer>& buffers);
231 
232   // Use buffer backed by dmabuf file descriptors in |passed_dmabuf_fds| for the
233   // OutputRecord associated with |picture_buffer_id|, taking ownership of the
234   // file descriptors.
235   void ImportBufferForPictureTask(
236       int32_t picture_buffer_id,
237       // TODO(posciak): (crbug.com/561749) we should normally be able to pass
238       // the vector by itself via std::move, but it's not possible to do this
239       // if this method is used as a callback.
240       std::unique_ptr<std::vector<base::ScopedFD>> passed_dmabuf_fds);
241 
242   // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_
243   // returning an event.
244   void ServiceDeviceTask();
245 
246   // Schedule poll if we have any buffers queued and the poll thread
247   // is not stopped (on surface set change).
248   void SchedulePollIfNeeded();
249 
250   // Attempt to start/stop device_poll_thread_.
251   bool StartDevicePoll();
252   bool StopDevicePoll(bool keep_input_state);
253 
254   // Ran on device_poll_thread_ to wait for device events.
255   void DevicePollTask(bool poll_device);
256 
257   enum State {
258     // We are in this state until Initialize() returns successfully.
259     // We can't post errors to the client in this state yet.
260     kUninitialized,
261     // Initialize() returned successfully.
262     kInitialized,
263     // This state allows making progress decoding more input stream.
264     kDecoding,
265     // Transitional state when we are not decoding any more stream, but are
266     // performing flush, reset, resolution change or are destroying ourselves.
267     kIdle,
268     // Requested new PictureBuffers via ProvidePictureBuffers(), awaiting
269     // AssignPictureBuffers().
270     kAwaitingPictureBuffers,
271     // Error state, set when sending NotifyError to client.
272     kError,
273   };
274 
275   // Buffer id for flush buffer, queued by FlushTask().
276   const int kFlushBufferId = -2;
277 
278   // Handler for Decode() on decoder_thread_.
279   void DecodeTask(const BitstreamBuffer& bitstream_buffer);
280 
281   // Schedule a new DecodeBufferTask if we are decoding.
282   void ScheduleDecodeBufferTaskIfNeeded();
283 
284   // Main decoder loop. Keep decoding the current buffer in decoder_, asking
285   // for more stream via TrySetNewBistreamBuffer() if decoder_ requests so,
286   // and handle other returns from it appropriately.
287   void DecodeBufferTask();
288 
289   // Check decoder_input_queue_ for any available buffers to decode and
290   // set the decoder_current_bitstream_buffer_ to the next buffer if one is
291   // available, taking it off the queue. Also set the current stream pointer
292   // in decoder_, and return true.
293   // Return false if no buffers are pending on decoder_input_queue_.
294   bool TrySetNewBistreamBuffer();
295 
296   // Auto-destruction reference for EGLSync (for message-passing).
297   void ReusePictureBufferTask(int32_t picture_buffer_id);
298 
299   // Called to actually send |dec_surface| to the client, after it is decoded
300   // preserving the order in which it was scheduled via SurfaceReady().
301   void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
302 
303   // Goes over the |decoder_display_queue_| and sends all buffers from the
304   // front of the queue that are already decoded to the client, in order.
305   void TryOutputSurfaces();
306 
307   // Creates a new decode surface or returns nullptr if one is not available.
308   scoped_refptr<V4L2DecodeSurface> CreateSurface();
309 
310   // Send decoded pictures to PictureReady.
311   void SendPictureReady();
312 
313   // Callback that indicates a picture has been cleared.
314   void PictureCleared();
315 
316   size_t input_planes_count_;
317   size_t output_planes_count_;
318 
319   // GPU Child thread task runner.
320   const scoped_refptr<base::SingleThreadTaskRunner> child_task_runner_;
321 
322   // Task runner Decode() and PictureReady() run on.
323   scoped_refptr<base::SingleThreadTaskRunner> decode_task_runner_;
324 
325   // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
326   // device worker threads back to the child thread.
327   base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_;
328 
329   // To expose client callbacks from VideoDecodeAccelerator.
330   // NOTE: all calls to these objects *MUST* be executed on
331   // child_task_runner_.
332   std::unique_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>>
333       client_ptr_factory_;
334   base::WeakPtr<VideoDecodeAccelerator::Client> client_;
335   // Callbacks to |decode_client_| must be executed on |decode_task_runner_|.
336   base::WeakPtr<Client> decode_client_;
337 
338   // V4L2 device in use.
339   scoped_refptr<V4L2Device> device_;
340 
341   // Thread to communicate with the device on.
342   base::Thread decoder_thread_;
343   scoped_refptr<base::SingleThreadTaskRunner> decoder_thread_task_runner_;
344 
345   // Thread used to poll the device for events.
346   base::Thread device_poll_thread_;
347 
348   // Input queue state.
349   bool input_streamon_;
350   // Number of input buffers enqueued to the device.
351   int input_buffer_queued_count_;
352   // Input buffers ready to use; LIFO since we don't care about ordering.
353   std::list<int> free_input_buffers_;
354   // Mapping of int index to an input buffer record.
355   std::vector<InputRecord> input_buffer_map_;
356 
357   // Output queue state.
358   bool output_streamon_;
359   // Number of output buffers enqueued to the device.
360   int output_buffer_queued_count_;
361   // Output buffers ready to use.
362   std::list<int> free_output_buffers_;
363   // Mapping of int index to an output buffer record.
364   std::vector<OutputRecord> output_buffer_map_;
365 
366   VideoCodecProfile video_profile_;
367   uint32_t input_format_fourcc_;
368   uint32_t output_format_fourcc_;
369   Size visible_size_;
370   Size coded_size_;
371 
372   struct BitstreamBufferRef;
373   // Input queue of stream buffers coming from the client.
374   std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_;
375   // BitstreamBuffer currently being processed.
376   std::unique_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
377 
378   // Queue storing decode surfaces ready to be output as soon as they are
379   // decoded. The surfaces must be output in order they are queued.
380   std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_;
381 
382   // Decoder state.
383   State state_;
384 
385   Config::OutputMode output_mode_;
386 
387   // If any of these are true, we are waiting for the device to finish decoding
388   // all previously-queued frames, so we can finish the flush/reset/surface
389   // change flows. These can stack.
390   bool decoder_flushing_;
391   bool decoder_resetting_;
392   bool surface_set_change_pending_;
393 
394   // Hardware accelerators.
395   // TODO(posciak): Try to have a superclass here if possible.
396   std::unique_ptr<V4L2H264Accelerator> h264_accelerator_;
397   std::unique_ptr<V4L2VP8Accelerator> vp8_accelerator_;
398   std::unique_ptr<V4L2VP9Accelerator> vp9_accelerator_;
399 
400   // Codec-specific software decoder in use.
401   std::unique_ptr<AcceleratedVideoDecoder> decoder_;
402 
403   // Surfaces queued to device to keep references to them while decoded.
404   using V4L2DecodeSurfaceByOutputId =
405       std::map<int, scoped_refptr<V4L2DecodeSurface>>;
406   V4L2DecodeSurfaceByOutputId surfaces_at_device_;
407 
408   // Surfaces sent to client to keep references to them while displayed.
409   using V4L2DecodeSurfaceByPictureBufferId =
410       std::map<int32_t, scoped_refptr<V4L2DecodeSurface>>;
411   V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_;
412 
413   // Record for decoded pictures that can be sent to PictureReady.
414   struct PictureRecord {
415     PictureRecord(bool cleared, const Picture& picture);
416     ~PictureRecord();
417     bool cleared;  // Whether the texture is cleared and safe to render from.
418     Picture picture;  // The decoded picture.
419   };
420   // Pictures that are ready but not sent to PictureReady yet.
421   std::queue<PictureRecord> pending_picture_ready_;
422 
423   // The number of pictures that are sent to PictureReady and will be cleared.
424   int picture_clearing_count_;
425 
426   // The WeakPtrFactory for |weak_this_|.
427   base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_;
428 
429   DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator);
430 };
431 
432 }  // namespace media
433 
434 #endif  // V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
435