1 // Copyright 2015 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // Note: ported from Chromium commit head: 85fdf90 5 6 #ifndef V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_ 7 #define V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_ 8 9 #include <stddef.h> 10 #include <stdint.h> 11 12 #include <memory> 13 #include <queue> 14 #include <utility> 15 #include <vector> 16 17 #include "base/macros.h" 18 #include "base/memory/linked_ptr.h" 19 #include "base/memory/ref_counted.h" 20 #include "base/memory/weak_ptr.h" 21 #include "base/synchronization/waitable_event.h" 22 #include "base/threading/thread.h" 23 #include "h264_decoder.h" 24 #include "v4l2_device.h" 25 #include "video_decode_accelerator.h" 26 #include "videodev2.h" 27 #include "vp8_decoder.h" 28 #include "vp9_decoder.h" 29 30 namespace media { 31 32 // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice 33 // level codec API for decoding. The slice level API provides only a low-level 34 // decoding functionality and requires userspace to provide support for parsing 35 // the input stream and managing decoder state across frames. 36 class V4L2SliceVideoDecodeAccelerator 37 : public VideoDecodeAccelerator { 38 public: 39 class V4L2DecodeSurface; 40 41 V4L2SliceVideoDecodeAccelerator( 42 const scoped_refptr<V4L2Device>& device); 43 ~V4L2SliceVideoDecodeAccelerator() override; 44 45 // VideoDecodeAccelerator implementation. 46 bool Initialize(const Config& config, Client* client) override; 47 void Decode(const BitstreamBuffer& bitstream_buffer) override; 48 void AssignPictureBuffers(const std::vector<PictureBuffer>& buffers) override; 49 void ImportBufferForPicture( 50 int32_t picture_buffer_id, 51 VideoPixelFormat pixel_format, 52 const NativePixmapHandle& native_pixmap_handle) override; 53 void ReusePictureBuffer(int32_t picture_buffer_id) override; 54 void Flush() override; 55 void Reset() override; 56 void Destroy() override; 57 bool TryToSetupDecodeOnSeparateThread( 58 const base::WeakPtr<Client>& decode_client, 59 const scoped_refptr<base::SingleThreadTaskRunner>& decode_task_runner) 60 override; 61 62 static VideoDecodeAccelerator::SupportedProfiles GetSupportedProfiles(); 63 64 private: 65 class V4L2H264Accelerator; 66 class V4L2VP8Accelerator; 67 class V4L2VP9Accelerator; 68 69 // Record for input buffers. 70 struct InputRecord { 71 InputRecord(); 72 int32_t input_id; 73 void* address; 74 size_t length; 75 size_t bytes_used; 76 bool at_device; 77 }; 78 79 // Record for output buffers. 80 struct OutputRecord { 81 OutputRecord(); 82 OutputRecord(OutputRecord&&) = default; 83 bool at_device; 84 bool at_client; 85 int32_t picture_id; 86 std::vector<base::ScopedFD> dmabuf_fds; 87 bool cleared; 88 }; 89 90 // See http://crbug.com/255116. 91 // Input bitstream buffer size for up to 1080p streams. 92 const size_t kInputBufferMaxSizeFor1080p = 1024 * 1024; 93 // Input bitstream buffer size for up to 4k streams. 94 const size_t kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p; 95 const size_t kNumInputBuffers = 16; 96 97 // Input format V4L2 fourccs this class supports. 98 static const uint32_t supported_input_fourccs_[]; 99 100 // 101 // Below methods are used by accelerator implementations. 102 // 103 // Append slice data in |data| of size |size| to pending hardware 104 // input buffer with |index|. This buffer will be submitted for decode 105 // on the next DecodeSurface(). Return true on success. 106 bool SubmitSlice(int index, const uint8_t* data, size_t size); 107 108 // Submit controls in |ext_ctrls| to hardware. Return true on success. 109 bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls); 110 111 // Gets current control values for controls in |ext_ctrls| from the driver. 112 // Return true on success. 113 bool GetExtControls(struct v4l2_ext_controls* ext_ctrls); 114 115 // Return true if the driver exposes V4L2 control |ctrl_id|, false otherwise. 116 bool IsCtrlExposed(uint32_t ctrl_id); 117 118 // Decode of |dec_surface| is ready to be submitted and all codec-specific 119 // settings are set in hardware. 120 void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface); 121 122 // |dec_surface| is ready to be outputted once decode is finished. 123 // This can be called before decode is actually done in hardware, and this 124 // method is responsible for maintaining the ordering, i.e. the surfaces will 125 // be outputted in the same order as SurfaceReady calls. To do so, the 126 // surfaces are put on decoder_display_queue_ and sent to output in that 127 // order once all preceding surfaces are sent. 128 void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface); 129 130 // 131 // Internal methods of this class. 132 // 133 // Recycle a V4L2 input buffer with |index| after dequeuing from device. 134 void ReuseInputBuffer(int index); 135 136 // Recycle V4L2 output buffer with |index|. Used as surface release callback. 137 void ReuseOutputBuffer(int index); 138 139 // Queue a |dec_surface| to device for decoding. 140 void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface); 141 142 // Dequeue any V4L2 buffers available and process. 143 void Dequeue(); 144 145 // V4L2 QBUF helpers. 146 bool EnqueueInputRecord(int index, uint32_t config_store); 147 bool EnqueueOutputRecord(int index); 148 149 // Set input and output formats in hardware. 150 bool SetupFormats(); 151 152 // Create input and output buffers. 153 bool CreateInputBuffers(); 154 bool CreateOutputBuffers(); 155 156 // Destroy input buffers. 157 void DestroyInputBuffers(); 158 159 // Destroy output buffers. If |dismiss| is true, also dismissing the 160 // associated PictureBuffers. 161 bool DestroyOutputs(bool dismiss); 162 163 // Used by DestroyOutputs. 164 bool DestroyOutputBuffers(); 165 166 // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer() 167 // and signal |done| after finishing. 168 void DismissPictures(const std::vector<int32_t>& picture_buffer_ids, 169 base::WaitableEvent* done); 170 171 // Task to finish initialization on decoder_thread_. 172 void InitializeTask(); 173 174 void NotifyError(Error error); 175 void DestroyTask(); 176 177 // Sets the state to kError and notifies client if needed. 178 void SetErrorState(Error error); 179 180 // Event handling. Events include flush, reset and resolution change and are 181 // processed while in kIdle state. 182 183 // Surface set change (resolution change) flow. 184 // If we have no surfaces allocated, start it immediately, otherwise mark 185 // ourselves as pending for surface set change. 186 void InitiateSurfaceSetChange(); 187 // If a surface set change is pending and we are ready, stop the device, 188 // destroy outputs, releasing resources and dismissing pictures as required, 189 // followed by starting the flow to allocate a new set for the current 190 // resolution/DPB size, as provided by decoder. 191 bool FinishSurfaceSetChange(); 192 193 // Flush flow when requested by client. 194 // When Flush() is called, it posts a FlushTask, which checks the input queue. 195 // If nothing is pending for decode on decoder_input_queue_, we call 196 // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef 197 // onto the decoder_input_queue_ to schedule a flush. When we reach it later 198 // on, we call InitiateFlush() to perform it at the correct time. 199 void FlushTask(); 200 // Tell the decoder to flush all frames, reset it and mark us as scheduled 201 // for flush, so that we can finish it once all pending decodes are finished. 202 void InitiateFlush(); 203 // To be called if decoder_flushing_ is true. If not all pending frames are 204 // decoded, return false, requesting the caller to try again later. 205 // Otherwise perform flush by sending all pending pictures to the client, 206 // notify it that flush is finished and return true, informing the caller 207 // that further progress can be made. 208 bool FinishFlush(); 209 210 // Reset flow when requested by client. 211 // Drop all inputs, reset the decoder and mark us as pending for reset. 212 void ResetTask(); 213 // To be called if decoder_resetting_ is true. If not all pending frames are 214 // decoded, return false, requesting the caller to try again later. 215 // Otherwise perform reset by dropping all pending outputs (client is not 216 // interested anymore), notifying it that reset is finished, and return true, 217 // informing the caller that further progress can be made. 218 bool FinishReset(); 219 220 // Called when a new event is pended. Transitions us into kIdle state (if not 221 // already in it), if possible. Also starts processing events. 222 void NewEventPending(); 223 224 // Called after all events are processed successfully (i.e. all Finish*() 225 // methods return true) to return to decoding state. 226 bool FinishEventProcessing(); 227 228 // Process pending events, if any. 229 void ProcessPendingEventsIfNeeded(); 230 231 // Allocate V4L2 buffers and assign them to |buffers| provided by the client 232 // via AssignPictureBuffers() on decoder thread. 233 void AssignPictureBuffersTask(const std::vector<PictureBuffer>& buffers); 234 235 // Use buffer backed by dmabuf file descriptors in |passed_dmabuf_fds| for the 236 // OutputRecord associated with |picture_buffer_id|, taking ownership of the 237 // file descriptors. 238 void ImportBufferForPictureTask( 239 int32_t picture_buffer_id, 240 // TODO(posciak): (https://crbug.com/561749) we should normally be able to 241 // pass the vector by itself via std::move, but it's not possible to do 242 // this if this method is used as a callback. 243 std::unique_ptr<std::vector<base::ScopedFD>> passed_dmabuf_fds); 244 245 // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_ 246 // returning an event. 247 void ServiceDeviceTask(); 248 249 // Schedule poll if we have any buffers queued and the poll thread 250 // is not stopped (on surface set change). 251 void SchedulePollIfNeeded(); 252 253 // Attempt to start/stop device_poll_thread_. 254 bool StartDevicePoll(); 255 bool StopDevicePoll(bool keep_input_state); 256 257 // Ran on device_poll_thread_ to wait for device events. 258 void DevicePollTask(bool poll_device); 259 260 enum State { 261 // We are in this state until Initialize() returns successfully. 262 // We can't post errors to the client in this state yet. 263 kUninitialized, 264 // Initialize() returned successfully. 265 kInitialized, 266 // This state allows making progress decoding more input stream. 267 kDecoding, 268 // Transitional state when we are not decoding any more stream, but are 269 // performing flush, reset, resolution change or are destroying ourselves. 270 kIdle, 271 // Requested new PictureBuffers via ProvidePictureBuffers(), awaiting 272 // AssignPictureBuffers(). 273 kAwaitingPictureBuffers, 274 // Error state, set when sending NotifyError to client. 275 kError, 276 }; 277 278 // Buffer id for flush buffer, queued by FlushTask(). 279 const int kFlushBufferId = -2; 280 281 // Handler for Decode() on decoder_thread_. 282 void DecodeTask(const BitstreamBuffer& bitstream_buffer); 283 284 // Schedule a new DecodeBufferTask if we are decoding. 285 void ScheduleDecodeBufferTaskIfNeeded(); 286 287 // Main decoder loop. Keep decoding the current buffer in decoder_, asking 288 // for more stream via TrySetNewBistreamBuffer() if decoder_ requests so, 289 // and handle other returns from it appropriately. 290 void DecodeBufferTask(); 291 292 // Check decoder_input_queue_ for any available buffers to decode and 293 // set the decoder_current_bitstream_buffer_ to the next buffer if one is 294 // available, taking it off the queue. Also set the current stream pointer 295 // in decoder_, and return true. 296 // Return false if no buffers are pending on decoder_input_queue_. 297 bool TrySetNewBistreamBuffer(); 298 299 // Auto-destruction reference for EGLSync (for message-passing). 300 void ReusePictureBufferTask(int32_t picture_buffer_id); 301 302 // Called to actually send |dec_surface| to the client, after it is decoded 303 // preserving the order in which it was scheduled via SurfaceReady(). 304 void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface); 305 306 // Goes over the |decoder_display_queue_| and sends all buffers from the 307 // front of the queue that are already decoded to the client, in order. 308 void TryOutputSurfaces(); 309 310 // Creates a new decode surface or returns nullptr if one is not available. 311 scoped_refptr<V4L2DecodeSurface> CreateSurface(); 312 313 // Send decoded pictures to PictureReady. 314 void SendPictureReady(); 315 316 // Callback that indicates a picture has been cleared. 317 void PictureCleared(); 318 319 size_t input_planes_count_; 320 size_t output_planes_count_; 321 322 // GPU Child thread task runner. 323 const scoped_refptr<base::SingleThreadTaskRunner> child_task_runner_; 324 325 // Task runner Decode() and PictureReady() run on. 326 scoped_refptr<base::SingleThreadTaskRunner> decode_task_runner_; 327 328 // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or 329 // device worker threads back to the child thread. 330 base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_; 331 332 // To expose client callbacks from VideoDecodeAccelerator. 333 // NOTE: all calls to these objects *MUST* be executed on 334 // child_task_runner_. 335 std::unique_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>> 336 client_ptr_factory_; 337 base::WeakPtr<VideoDecodeAccelerator::Client> client_; 338 // Callbacks to |decode_client_| must be executed on |decode_task_runner_|. 339 base::WeakPtr<Client> decode_client_; 340 341 // V4L2 device in use. 342 scoped_refptr<V4L2Device> device_; 343 344 // Thread to communicate with the device on. 345 base::Thread decoder_thread_; 346 scoped_refptr<base::SingleThreadTaskRunner> decoder_thread_task_runner_; 347 348 // Thread used to poll the device for events. 349 base::Thread device_poll_thread_; 350 351 // Input queue state. 352 bool input_streamon_; 353 // Number of input buffers enqueued to the device. 354 int input_buffer_queued_count_; 355 // Input buffers ready to use; LIFO since we don't care about ordering. 356 std::list<int> free_input_buffers_; 357 // Mapping of int index to an input buffer record. 358 std::vector<InputRecord> input_buffer_map_; 359 360 // Output queue state. 361 bool output_streamon_; 362 // Number of output buffers enqueued to the device. 363 int output_buffer_queued_count_; 364 // Output buffers ready to use. 365 std::list<int> free_output_buffers_; 366 // Mapping of int index to an output buffer record. 367 std::vector<OutputRecord> output_buffer_map_; 368 369 VideoCodecProfile video_profile_; 370 uint32_t input_format_fourcc_; 371 uint32_t output_format_fourcc_; 372 Size coded_size_; 373 374 struct BitstreamBufferRef; 375 // Input queue of stream buffers coming from the client. 376 std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_; 377 // BitstreamBuffer currently being processed. 378 std::unique_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_; 379 380 // Queue storing decode surfaces ready to be output as soon as they are 381 // decoded. The surfaces must be output in order they are queued. 382 std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_; 383 384 // Decoder state. 385 State state_; 386 387 Config::OutputMode output_mode_; 388 389 // If any of these are true, we are waiting for the device to finish decoding 390 // all previously-queued frames, so we can finish the flush/reset/surface 391 // change flows. These can stack. 392 bool decoder_flushing_; 393 bool decoder_resetting_; 394 bool surface_set_change_pending_; 395 396 // Hardware accelerators. 397 // TODO(posciak): Try to have a superclass here if possible. 398 std::unique_ptr<V4L2H264Accelerator> h264_accelerator_; 399 std::unique_ptr<V4L2VP8Accelerator> vp8_accelerator_; 400 std::unique_ptr<V4L2VP9Accelerator> vp9_accelerator_; 401 402 // Codec-specific software decoder in use. 403 std::unique_ptr<AcceleratedVideoDecoder> decoder_; 404 405 // Surfaces queued to device to keep references to them while decoded. 406 using V4L2DecodeSurfaceByOutputId = 407 std::map<int, scoped_refptr<V4L2DecodeSurface>>; 408 V4L2DecodeSurfaceByOutputId surfaces_at_device_; 409 410 // Surfaces sent to client to keep references to them while displayed. 411 using V4L2DecodeSurfaceByPictureBufferId = 412 std::map<int32_t, scoped_refptr<V4L2DecodeSurface>>; 413 V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_; 414 415 // Record for decoded pictures that can be sent to PictureReady. 416 struct PictureRecord { 417 PictureRecord(bool cleared, const Picture& picture); 418 ~PictureRecord(); 419 bool cleared; // Whether the texture is cleared and safe to render from. 420 Picture picture; // The decoded picture. 421 }; 422 423 // Pictures that are ready but not sent to PictureReady yet. 424 std::queue<PictureRecord> pending_picture_ready_; 425 426 // The number of pictures that are sent to PictureReady and will be cleared. 427 int picture_clearing_count_; 428 429 // The WeakPtrFactory for |weak_this_|. 430 base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_; 431 432 DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator); 433 }; 434 435 } // namespace media 436 437 #endif // V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_ 438