1 /* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <stdio.h> 25 #include <string.h> 26 #include <inttypes.h> 27 #include <unistd.h> 28 29 #include "CUnit/Basic.h" 30 31 #include <unistd.h> 32 #include "util_math.h" 33 34 #include "amdgpu_test.h" 35 #include "amdgpu_drm.h" 36 #include "amdgpu_internal.h" 37 #include "decode_messages.h" 38 #include "frame.h" 39 40 #define IB_SIZE 4096 41 #define MAX_RESOURCES 16 42 43 #define DECODE_CMD_MSG_BUFFER 0x00000000 44 #define DECODE_CMD_DPB_BUFFER 0x00000001 45 #define DECODE_CMD_DECODING_TARGET_BUFFER 0x00000002 46 #define DECODE_CMD_FEEDBACK_BUFFER 0x00000003 47 #define DECODE_CMD_PROB_TBL_BUFFER 0x00000004 48 #define DECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005 49 #define DECODE_CMD_BITSTREAM_BUFFER 0x00000100 50 #define DECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204 51 #define DECODE_CMD_CONTEXT_BUFFER 0x00000206 52 53 #define DECODE_IB_PARAM_DECODE_BUFFER (0x00000001) 54 55 #define DECODE_CMDBUF_FLAGS_MSG_BUFFER (0x00000001) 56 #define DECODE_CMDBUF_FLAGS_DPB_BUFFER (0x00000002) 57 #define DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER (0x00000004) 58 #define DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER (0x00000008) 59 #define DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER (0x00000010) 60 #define DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER (0x00000200) 61 #define DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER (0x00000800) 62 #define DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER (0x00001000) 63 #define DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER (0x00100000) 64 65 static bool vcn_dec_sw_ring = false; 66 static bool vcn_unified_ring = false; 67 68 #define H264_NAL_TYPE_NON_IDR_SLICE 1 69 #define H264_NAL_TYPE_DP_A_SLICE 2 70 #define H264_NAL_TYPE_DP_B_SLICE 3 71 #define H264_NAL_TYPE_DP_C_SLICE 0x4 72 #define H264_NAL_TYPE_IDR_SLICE 0x5 73 #define H264_NAL_TYPE_SEI 0x6 74 #define H264_NAL_TYPE_SEQ_PARAM 0x7 75 #define H264_NAL_TYPE_PIC_PARAM 0x8 76 #define H264_NAL_TYPE_ACCESS_UNIT 0x9 77 #define H264_NAL_TYPE_END_OF_SEQ 0xa 78 #define H264_NAL_TYPE_END_OF_STREAM 0xb 79 #define H264_NAL_TYPE_FILLER_DATA 0xc 80 #define H264_NAL_TYPE_SEQ_EXTENSION 0xd 81 82 #define H264_START_CODE 0x000001 83 84 struct amdgpu_vcn_bo { 85 amdgpu_bo_handle handle; 86 amdgpu_va_handle va_handle; 87 uint64_t addr; 88 uint64_t size; 89 uint8_t *ptr; 90 }; 91 92 typedef struct rvcn_decode_buffer_s { 93 unsigned int valid_buf_flag; 94 unsigned int msg_buffer_address_hi; 95 unsigned int msg_buffer_address_lo; 96 unsigned int dpb_buffer_address_hi; 97 unsigned int dpb_buffer_address_lo; 98 unsigned int target_buffer_address_hi; 99 unsigned int target_buffer_address_lo; 100 unsigned int session_contex_buffer_address_hi; 101 unsigned int session_contex_buffer_address_lo; 102 unsigned int bitstream_buffer_address_hi; 103 unsigned int bitstream_buffer_address_lo; 104 unsigned int context_buffer_address_hi; 105 unsigned int context_buffer_address_lo; 106 unsigned int feedback_buffer_address_hi; 107 unsigned int feedback_buffer_address_lo; 108 unsigned int luma_hist_buffer_address_hi; 109 unsigned int luma_hist_buffer_address_lo; 110 unsigned int prob_tbl_buffer_address_hi; 111 unsigned int prob_tbl_buffer_address_lo; 112 unsigned int sclr_coeff_buffer_address_hi; 113 unsigned int sclr_coeff_buffer_address_lo; 114 unsigned int it_sclr_table_buffer_address_hi; 115 unsigned int it_sclr_table_buffer_address_lo; 116 unsigned int sclr_target_buffer_address_hi; 117 unsigned int sclr_target_buffer_address_lo; 118 unsigned int cenc_size_info_buffer_address_hi; 119 unsigned int cenc_size_info_buffer_address_lo; 120 unsigned int mpeg2_pic_param_buffer_address_hi; 121 unsigned int mpeg2_pic_param_buffer_address_lo; 122 unsigned int mpeg2_mb_control_buffer_address_hi; 123 unsigned int mpeg2_mb_control_buffer_address_lo; 124 unsigned int mpeg2_idct_coeff_buffer_address_hi; 125 unsigned int mpeg2_idct_coeff_buffer_address_lo; 126 } rvcn_decode_buffer_t; 127 128 typedef struct rvcn_decode_ib_package_s { 129 unsigned int package_size; 130 unsigned int package_type; 131 } rvcn_decode_ib_package_t; 132 133 134 struct amdgpu_vcn_reg { 135 uint32_t data0; 136 uint32_t data1; 137 uint32_t cmd; 138 uint32_t nop; 139 uint32_t cntl; 140 }; 141 142 typedef struct BufferInfo_t { 143 uint32_t numOfBitsInBuffer; 144 const uint8_t *decBuffer; 145 uint8_t decData; 146 uint32_t decBufferSize; 147 const uint8_t *end; 148 } bufferInfo; 149 150 typedef struct h264_decode_t { 151 uint8_t profile; 152 uint8_t level_idc; 153 uint8_t nal_ref_idc; 154 uint8_t nal_unit_type; 155 uint32_t pic_width, pic_height; 156 uint32_t slice_type; 157 } h264_decode; 158 159 static amdgpu_device_handle device_handle; 160 static uint32_t major_version; 161 static uint32_t minor_version; 162 static uint32_t family_id; 163 static uint32_t chip_rev; 164 static uint32_t chip_id; 165 static uint32_t asic_id; 166 static uint32_t chip_rev; 167 static struct amdgpu_vcn_bo enc_buf; 168 static struct amdgpu_vcn_bo cpb_buf; 169 static uint32_t enc_task_id; 170 171 static amdgpu_context_handle context_handle; 172 static amdgpu_bo_handle ib_handle; 173 static amdgpu_va_handle ib_va_handle; 174 static uint64_t ib_mc_address; 175 static uint32_t *ib_cpu; 176 static uint32_t *ib_checksum; 177 static uint32_t *ib_size_in_dw; 178 179 static rvcn_decode_buffer_t *decode_buffer; 180 struct amdgpu_vcn_bo session_ctx_buf; 181 182 static amdgpu_bo_handle resources[MAX_RESOURCES]; 183 static unsigned num_resources; 184 185 static uint8_t vcn_reg_index; 186 static struct amdgpu_vcn_reg reg[] = { 187 {0x81c4, 0x81c5, 0x81c3, 0x81ff, 0x81c6}, 188 {0x504, 0x505, 0x503, 0x53f, 0x506}, 189 {0x10, 0x11, 0xf, 0x29, 0x26d}, 190 }; 191 192 uint32_t gWidth, gHeight, gSliceType; 193 static uint32_t vcn_ip_version_major; 194 static uint32_t vcn_ip_version_minor; 195 static void amdgpu_cs_vcn_dec_create(void); 196 static void amdgpu_cs_vcn_dec_decode(void); 197 static void amdgpu_cs_vcn_dec_destroy(void); 198 199 static void amdgpu_cs_vcn_enc_create(void); 200 static void amdgpu_cs_vcn_enc_encode(void); 201 static void amdgpu_cs_vcn_enc_destroy(void); 202 203 static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc); 204 static void amdgpu_cs_sq_ib_tail(uint32_t *end); 205 static void h264_check_0s (bufferInfo * bufInfo, int count); 206 static int32_t h264_se (bufferInfo * bufInfo); 207 static inline uint32_t bs_read_u1(bufferInfo *bufinfo); 208 static inline int bs_eof(bufferInfo *bufinfo); 209 static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n); 210 static inline uint32_t bs_read_ue(bufferInfo* bufinfo); 211 static uint32_t remove_03 (uint8_t *bptr, uint32_t len); 212 static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo *bufInfo); 213 static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo *bufInfo); 214 static void h264_slice_header (h264_decode *dec, bufferInfo *bufInfo); 215 static uint8_t h264_parse_nal (h264_decode *dec, bufferInfo *bufInfo); 216 static uint32_t h264_find_next_start_code (uint8_t *pBuf, uint32_t bufLen); 217 static int verify_checksum(uint8_t *buffer, uint32_t buffer_size); 218 219 CU_TestInfo vcn_tests[] = { 220 221 { "VCN DEC create", amdgpu_cs_vcn_dec_create }, 222 { "VCN DEC decode", amdgpu_cs_vcn_dec_decode }, 223 { "VCN DEC destroy", amdgpu_cs_vcn_dec_destroy }, 224 225 { "VCN ENC create", amdgpu_cs_vcn_enc_create }, 226 { "VCN ENC encode", amdgpu_cs_vcn_enc_encode }, 227 { "VCN ENC destroy", amdgpu_cs_vcn_enc_destroy }, 228 CU_TEST_INFO_NULL, 229 }; 230 231 CU_BOOL suite_vcn_tests_enable(void) 232 { 233 struct drm_amdgpu_info_hw_ip info; 234 bool enc_ring, dec_ring; 235 int r; 236 237 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 238 &minor_version, &device_handle)) 239 return CU_FALSE; 240 241 family_id = device_handle->info.family_id; 242 asic_id = device_handle->info.asic_id; 243 chip_rev = device_handle->info.chip_rev; 244 chip_id = device_handle->info.chip_external_rev; 245 246 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &info); 247 if (!r) { 248 vcn_ip_version_major = info.hw_ip_version_major; 249 vcn_ip_version_minor = info.hw_ip_version_minor; 250 enc_ring = !!info.available_rings; 251 /* in vcn 4.0 it re-uses encoding queue as unified queue */ 252 if (vcn_ip_version_major >= 4) { 253 vcn_unified_ring = true; 254 vcn_dec_sw_ring = true; 255 dec_ring = enc_ring; 256 } else { 257 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info); 258 dec_ring = !!info.available_rings; 259 } 260 } 261 262 if (amdgpu_device_deinitialize(device_handle)) 263 return CU_FALSE; 264 265 if (r) { 266 printf("\n\nASIC query hw info failed\n"); 267 return CU_FALSE; 268 } 269 270 if (!(dec_ring || enc_ring) || 271 (family_id < AMDGPU_FAMILY_RV && 272 (family_id == AMDGPU_FAMILY_AI && 273 (chip_id - chip_rev) < 0x32))) { /* Arcturus */ 274 printf("\n\nThe ASIC NOT support VCN, suite disabled\n"); 275 return CU_FALSE; 276 } 277 278 if (!dec_ring) { 279 amdgpu_set_test_active("VCN Tests", "VCN DEC create", CU_FALSE); 280 amdgpu_set_test_active("VCN Tests", "VCN DEC decode", CU_FALSE); 281 amdgpu_set_test_active("VCN Tests", "VCN DEC destroy", CU_FALSE); 282 } 283 284 if (family_id == AMDGPU_FAMILY_AI || !enc_ring) { 285 amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE); 286 amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE); 287 amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE); 288 } 289 290 if (vcn_ip_version_major == 1) 291 vcn_reg_index = 0; 292 else if (vcn_ip_version_major == 2 && vcn_ip_version_minor == 0) 293 vcn_reg_index = 1; 294 else if ((vcn_ip_version_major == 2 && vcn_ip_version_minor >= 5) || 295 vcn_ip_version_major == 3) 296 vcn_reg_index = 2; 297 298 return CU_TRUE; 299 } 300 301 int suite_vcn_tests_init(void) 302 { 303 int r; 304 305 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 306 &minor_version, &device_handle); 307 if (r) 308 return CUE_SINIT_FAILED; 309 310 family_id = device_handle->info.family_id; 311 312 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 313 if (r) 314 return CUE_SINIT_FAILED; 315 316 r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096, 317 AMDGPU_GEM_DOMAIN_GTT, 0, 318 &ib_handle, (void**)&ib_cpu, 319 &ib_mc_address, &ib_va_handle); 320 if (r) 321 return CUE_SINIT_FAILED; 322 323 return CUE_SUCCESS; 324 } 325 326 int suite_vcn_tests_clean(void) 327 { 328 int r; 329 330 r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle, 331 ib_mc_address, IB_SIZE); 332 if (r) 333 return CUE_SCLEAN_FAILED; 334 335 r = amdgpu_cs_ctx_free(context_handle); 336 if (r) 337 return CUE_SCLEAN_FAILED; 338 339 r = amdgpu_device_deinitialize(device_handle); 340 if (r) 341 return CUE_SCLEAN_FAILED; 342 343 return CUE_SUCCESS; 344 } 345 346 static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc) 347 { 348 /* signature */ 349 *(base + (*offset)++) = 0x00000010; 350 *(base + (*offset)++) = 0x30000002; 351 ib_checksum = base + (*offset)++; 352 ib_size_in_dw = base + (*offset)++; 353 354 /* engine info */ 355 *(base + (*offset)++) = 0x00000010; 356 *(base + (*offset)++) = 0x30000001; 357 *(base + (*offset)++) = enc ? 2 : 3; 358 *(base + (*offset)++) = 0x00000000; 359 } 360 361 static void amdgpu_cs_sq_ib_tail(uint32_t *end) 362 { 363 uint32_t size_in_dw; 364 uint32_t checksum = 0; 365 366 /* if the pointers are invalid, no need to process */ 367 if (ib_checksum == NULL || ib_size_in_dw == NULL) 368 return; 369 370 size_in_dw = end - ib_size_in_dw - 1; 371 *ib_size_in_dw = size_in_dw; 372 *(ib_size_in_dw + 4) = size_in_dw * sizeof(uint32_t); 373 374 for (int i = 0; i < size_in_dw; i++) 375 checksum += *(ib_checksum + 2 + i); 376 377 *ib_checksum = checksum; 378 379 ib_checksum = NULL; 380 ib_size_in_dw = NULL; 381 } 382 383 static int submit(unsigned ndw, unsigned ip) 384 { 385 struct amdgpu_cs_request ibs_request = {0}; 386 struct amdgpu_cs_ib_info ib_info = {0}; 387 struct amdgpu_cs_fence fence_status = {0}; 388 uint32_t expired; 389 int r; 390 391 ib_info.ib_mc_address = ib_mc_address; 392 ib_info.size = ndw; 393 394 ibs_request.ip_type = ip; 395 396 r = amdgpu_bo_list_create(device_handle, num_resources, resources, 397 NULL, &ibs_request.resources); 398 if (r) 399 return r; 400 401 ibs_request.number_of_ibs = 1; 402 ibs_request.ibs = &ib_info; 403 ibs_request.fence_info.handle = NULL; 404 405 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 406 if (r) 407 return r; 408 409 r = amdgpu_bo_list_destroy(ibs_request.resources); 410 if (r) 411 return r; 412 413 fence_status.context = context_handle; 414 fence_status.ip_type = ip; 415 fence_status.fence = ibs_request.seq_no; 416 417 r = amdgpu_cs_query_fence_status(&fence_status, 418 AMDGPU_TIMEOUT_INFINITE, 419 0, &expired); 420 if (r) 421 return r; 422 423 return 0; 424 } 425 426 static void alloc_resource(struct amdgpu_vcn_bo *vcn_bo, 427 unsigned size, unsigned domain) 428 { 429 struct amdgpu_bo_alloc_request req = {0}; 430 amdgpu_bo_handle buf_handle; 431 amdgpu_va_handle va_handle; 432 uint64_t va = 0; 433 int r; 434 435 req.alloc_size = ALIGN(size, 4096); 436 req.preferred_heap = domain; 437 r = amdgpu_bo_alloc(device_handle, &req, &buf_handle); 438 CU_ASSERT_EQUAL(r, 0); 439 r = amdgpu_va_range_alloc(device_handle, 440 amdgpu_gpu_va_range_general, 441 req.alloc_size, 1, 0, &va, 442 &va_handle, 0); 443 CU_ASSERT_EQUAL(r, 0); 444 r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0, 445 AMDGPU_VA_OP_MAP); 446 CU_ASSERT_EQUAL(r, 0); 447 vcn_bo->addr = va; 448 vcn_bo->handle = buf_handle; 449 vcn_bo->size = req.alloc_size; 450 vcn_bo->va_handle = va_handle; 451 r = amdgpu_bo_cpu_map(vcn_bo->handle, (void **)&vcn_bo->ptr); 452 CU_ASSERT_EQUAL(r, 0); 453 memset(vcn_bo->ptr, 0, size); 454 r = amdgpu_bo_cpu_unmap(vcn_bo->handle); 455 CU_ASSERT_EQUAL(r, 0); 456 } 457 458 static void free_resource(struct amdgpu_vcn_bo *vcn_bo) 459 { 460 int r; 461 462 r = amdgpu_bo_va_op(vcn_bo->handle, 0, vcn_bo->size, 463 vcn_bo->addr, 0, AMDGPU_VA_OP_UNMAP); 464 CU_ASSERT_EQUAL(r, 0); 465 466 r = amdgpu_va_range_free(vcn_bo->va_handle); 467 CU_ASSERT_EQUAL(r, 0); 468 469 r = amdgpu_bo_free(vcn_bo->handle); 470 CU_ASSERT_EQUAL(r, 0); 471 memset(vcn_bo, 0, sizeof(*vcn_bo)); 472 } 473 474 static void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx) 475 { 476 if (vcn_dec_sw_ring == false) { 477 ib_cpu[(*idx)++] = reg[vcn_reg_index].data0; 478 ib_cpu[(*idx)++] = addr; 479 ib_cpu[(*idx)++] = reg[vcn_reg_index].data1; 480 ib_cpu[(*idx)++] = addr >> 32; 481 ib_cpu[(*idx)++] = reg[vcn_reg_index].cmd; 482 ib_cpu[(*idx)++] = cmd << 1; 483 return; 484 } 485 486 /* Support decode software ring message */ 487 if (!(*idx)) { 488 rvcn_decode_ib_package_t *ib_header; 489 490 if (vcn_unified_ring) 491 amdgpu_cs_sq_head(ib_cpu, idx, false); 492 493 ib_header = (rvcn_decode_ib_package_t *)&ib_cpu[*idx]; 494 ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + 495 sizeof(struct rvcn_decode_ib_package_s); 496 497 (*idx)++; 498 ib_header->package_type = (DECODE_IB_PARAM_DECODE_BUFFER); 499 (*idx)++; 500 501 decode_buffer = (rvcn_decode_buffer_t *)&(ib_cpu[*idx]); 502 *idx += sizeof(struct rvcn_decode_buffer_s) / 4; 503 memset(decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s)); 504 } 505 506 switch(cmd) { 507 case DECODE_CMD_MSG_BUFFER: 508 decode_buffer->valid_buf_flag |= DECODE_CMDBUF_FLAGS_MSG_BUFFER; 509 decode_buffer->msg_buffer_address_hi = (addr >> 32); 510 decode_buffer->msg_buffer_address_lo = (addr); 511 break; 512 case DECODE_CMD_DPB_BUFFER: 513 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DPB_BUFFER); 514 decode_buffer->dpb_buffer_address_hi = (addr >> 32); 515 decode_buffer->dpb_buffer_address_lo = (addr); 516 break; 517 case DECODE_CMD_DECODING_TARGET_BUFFER: 518 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER); 519 decode_buffer->target_buffer_address_hi = (addr >> 32); 520 decode_buffer->target_buffer_address_lo = (addr); 521 break; 522 case DECODE_CMD_FEEDBACK_BUFFER: 523 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER); 524 decode_buffer->feedback_buffer_address_hi = (addr >> 32); 525 decode_buffer->feedback_buffer_address_lo = (addr); 526 break; 527 case DECODE_CMD_PROB_TBL_BUFFER: 528 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER); 529 decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32); 530 decode_buffer->prob_tbl_buffer_address_lo = (addr); 531 break; 532 case DECODE_CMD_SESSION_CONTEXT_BUFFER: 533 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER); 534 decode_buffer->session_contex_buffer_address_hi = (addr >> 32); 535 decode_buffer->session_contex_buffer_address_lo = (addr); 536 break; 537 case DECODE_CMD_BITSTREAM_BUFFER: 538 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER); 539 decode_buffer->bitstream_buffer_address_hi = (addr >> 32); 540 decode_buffer->bitstream_buffer_address_lo = (addr); 541 break; 542 case DECODE_CMD_IT_SCALING_TABLE_BUFFER: 543 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER); 544 decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32); 545 decode_buffer->it_sclr_table_buffer_address_lo = (addr); 546 break; 547 case DECODE_CMD_CONTEXT_BUFFER: 548 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER); 549 decode_buffer->context_buffer_address_hi = (addr >> 32); 550 decode_buffer->context_buffer_address_lo = (addr); 551 break; 552 default: 553 printf("Not Support!\n"); 554 } 555 } 556 557 static void amdgpu_cs_vcn_dec_create(void) 558 { 559 struct amdgpu_vcn_bo msg_buf; 560 unsigned ip; 561 int len, r; 562 563 num_resources = 0; 564 alloc_resource(&msg_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); 565 alloc_resource(&session_ctx_buf, 32 * 4096, AMDGPU_GEM_DOMAIN_VRAM); 566 resources[num_resources++] = msg_buf.handle; 567 resources[num_resources++] = session_ctx_buf.handle; 568 resources[num_resources++] = ib_handle; 569 570 r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr); 571 CU_ASSERT_EQUAL(r, 0); 572 573 memset(msg_buf.ptr, 0, 4096); 574 memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg)); 575 576 len = 0; 577 578 vcn_dec_cmd(session_ctx_buf.addr, 5, &len); 579 if (vcn_dec_sw_ring == true) { 580 vcn_dec_cmd(msg_buf.addr, 0, &len); 581 } else { 582 ib_cpu[len++] = reg[vcn_reg_index].data0; 583 ib_cpu[len++] = msg_buf.addr; 584 ib_cpu[len++] = reg[vcn_reg_index].data1; 585 ib_cpu[len++] = msg_buf.addr >> 32; 586 ib_cpu[len++] = reg[vcn_reg_index].cmd; 587 ib_cpu[len++] = 0; 588 for (; len % 16; ) { 589 ib_cpu[len++] = reg[vcn_reg_index].nop; 590 ib_cpu[len++] = 0; 591 } 592 } 593 594 if (vcn_unified_ring) { 595 amdgpu_cs_sq_ib_tail(ib_cpu + len); 596 ip = AMDGPU_HW_IP_VCN_ENC; 597 } else 598 ip = AMDGPU_HW_IP_VCN_DEC; 599 600 r = submit(len, ip); 601 602 CU_ASSERT_EQUAL(r, 0); 603 604 free_resource(&msg_buf); 605 } 606 607 static void amdgpu_cs_vcn_dec_decode(void) 608 { 609 const unsigned dpb_size = 15923584, dt_size = 737280; 610 uint64_t msg_addr, fb_addr, bs_addr, dpb_addr, ctx_addr, dt_addr, it_addr, sum; 611 struct amdgpu_vcn_bo dec_buf; 612 int size, len, i, r; 613 unsigned ip; 614 uint8_t *dec; 615 616 size = 4*1024; /* msg */ 617 size += 4*1024; /* fb */ 618 size += 4096; /*it_scaling_table*/ 619 size += ALIGN(sizeof(uvd_bitstream), 4*1024); 620 size += ALIGN(dpb_size, 4*1024); 621 size += ALIGN(dt_size, 4*1024); 622 623 num_resources = 0; 624 alloc_resource(&dec_buf, size, AMDGPU_GEM_DOMAIN_GTT); 625 resources[num_resources++] = dec_buf.handle; 626 resources[num_resources++] = ib_handle; 627 628 r = amdgpu_bo_cpu_map(dec_buf.handle, (void **)&dec_buf.ptr); 629 dec = dec_buf.ptr; 630 631 CU_ASSERT_EQUAL(r, 0); 632 memset(dec_buf.ptr, 0, size); 633 memcpy(dec_buf.ptr, vcn_dec_decode_msg, sizeof(vcn_dec_decode_msg)); 634 memcpy(dec_buf.ptr + sizeof(vcn_dec_decode_msg), 635 avc_decode_msg, sizeof(avc_decode_msg)); 636 637 dec += 4*1024; 638 memcpy(dec, feedback_msg, sizeof(feedback_msg)); 639 dec += 4*1024; 640 memcpy(dec, uvd_it_scaling_table, sizeof(uvd_it_scaling_table)); 641 642 dec += 4*1024; 643 memcpy(dec, uvd_bitstream, sizeof(uvd_bitstream)); 644 645 dec += ALIGN(sizeof(uvd_bitstream), 4*1024); 646 647 dec += ALIGN(dpb_size, 4*1024); 648 649 msg_addr = dec_buf.addr; 650 fb_addr = msg_addr + 4*1024; 651 it_addr = fb_addr + 4*1024; 652 bs_addr = it_addr + 4*1024; 653 dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024); 654 ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024); 655 dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024); 656 657 len = 0; 658 vcn_dec_cmd(session_ctx_buf.addr, 0x5, &len); 659 vcn_dec_cmd(msg_addr, 0x0, &len); 660 vcn_dec_cmd(dpb_addr, 0x1, &len); 661 vcn_dec_cmd(dt_addr, 0x2, &len); 662 vcn_dec_cmd(fb_addr, 0x3, &len); 663 vcn_dec_cmd(bs_addr, 0x100, &len); 664 vcn_dec_cmd(it_addr, 0x204, &len); 665 vcn_dec_cmd(ctx_addr, 0x206, &len); 666 667 if (vcn_dec_sw_ring == false) { 668 ib_cpu[len++] = reg[vcn_reg_index].cntl; 669 ib_cpu[len++] = 0x1; 670 for (; len % 16; ) { 671 ib_cpu[len++] = reg[vcn_reg_index].nop; 672 ib_cpu[len++] = 0; 673 } 674 } 675 676 if (vcn_unified_ring) { 677 amdgpu_cs_sq_ib_tail(ib_cpu + len); 678 ip = AMDGPU_HW_IP_VCN_ENC; 679 } else 680 ip = AMDGPU_HW_IP_VCN_DEC; 681 682 r = submit(len, ip); 683 CU_ASSERT_EQUAL(r, 0); 684 685 for (i = 0, sum = 0; i < dt_size; ++i) 686 sum += dec[i]; 687 688 CU_ASSERT_EQUAL(sum, SUM_DECODE); 689 690 free_resource(&dec_buf); 691 } 692 693 static void amdgpu_cs_vcn_dec_destroy(void) 694 { 695 struct amdgpu_vcn_bo msg_buf; 696 unsigned ip; 697 int len, r; 698 699 num_resources = 0; 700 alloc_resource(&msg_buf, 1024, AMDGPU_GEM_DOMAIN_GTT); 701 resources[num_resources++] = msg_buf.handle; 702 resources[num_resources++] = ib_handle; 703 704 r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr); 705 CU_ASSERT_EQUAL(r, 0); 706 707 memset(msg_buf.ptr, 0, 1024); 708 memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg)); 709 710 len = 0; 711 vcn_dec_cmd(session_ctx_buf.addr, 5, &len); 712 if (vcn_dec_sw_ring == true) { 713 vcn_dec_cmd(msg_buf.addr, 0, &len); 714 } else { 715 ib_cpu[len++] = reg[vcn_reg_index].data0; 716 ib_cpu[len++] = msg_buf.addr; 717 ib_cpu[len++] = reg[vcn_reg_index].data1; 718 ib_cpu[len++] = msg_buf.addr >> 32; 719 ib_cpu[len++] = reg[vcn_reg_index].cmd; 720 ib_cpu[len++] = 0; 721 for (; len % 16; ) { 722 ib_cpu[len++] = reg[vcn_reg_index].nop; 723 ib_cpu[len++] = 0; 724 } 725 } 726 727 if (vcn_unified_ring) { 728 amdgpu_cs_sq_ib_tail(ib_cpu + len); 729 ip = AMDGPU_HW_IP_VCN_ENC; 730 } else 731 ip = AMDGPU_HW_IP_VCN_DEC; 732 733 r = submit(len, ip); 734 CU_ASSERT_EQUAL(r, 0); 735 736 free_resource(&msg_buf); 737 free_resource(&session_ctx_buf); 738 } 739 740 static void amdgpu_cs_vcn_enc_create(void) 741 { 742 int len, r; 743 uint32_t *p_task_size = NULL; 744 uint32_t task_offset = 0, st_offset; 745 uint32_t *st_size = NULL; 746 unsigned width = 160, height = 128, buf_size; 747 uint32_t fw_maj = 1, fw_min = 9; 748 749 if (vcn_ip_version_major == 2) { 750 fw_maj = 1; 751 fw_min = 1; 752 } else if (vcn_ip_version_major == 3) { 753 fw_maj = 1; 754 fw_min = 0; 755 } 756 757 gWidth = width; 758 gHeight = height; 759 buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; 760 enc_task_id = 1; 761 762 num_resources = 0; 763 alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); 764 alloc_resource(&cpb_buf, buf_size * 2, AMDGPU_GEM_DOMAIN_GTT); 765 resources[num_resources++] = enc_buf.handle; 766 resources[num_resources++] = cpb_buf.handle; 767 resources[num_resources++] = ib_handle; 768 769 r = amdgpu_bo_cpu_map(enc_buf.handle, (void**)&enc_buf.ptr); 770 memset(enc_buf.ptr, 0, 128 * 1024); 771 r = amdgpu_bo_cpu_unmap(enc_buf.handle); 772 773 r = amdgpu_bo_cpu_map(cpb_buf.handle, (void**)&enc_buf.ptr); 774 memset(enc_buf.ptr, 0, buf_size * 2); 775 r = amdgpu_bo_cpu_unmap(cpb_buf.handle); 776 777 len = 0; 778 779 if (vcn_unified_ring) 780 amdgpu_cs_sq_head(ib_cpu, &len, true); 781 782 /* session info */ 783 st_offset = len; 784 st_size = &ib_cpu[len++]; /* size */ 785 ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ 786 ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); 787 ib_cpu[len++] = enc_buf.addr >> 32; 788 ib_cpu[len++] = enc_buf.addr; 789 ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */ 790 *st_size = (len - st_offset) * 4; 791 792 /* task info */ 793 task_offset = len; 794 st_offset = len; 795 st_size = &ib_cpu[len++]; /* size */ 796 ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ 797 p_task_size = &ib_cpu[len++]; 798 ib_cpu[len++] = enc_task_id++; /* task_id */ 799 ib_cpu[len++] = 0; /* feedback */ 800 *st_size = (len - st_offset) * 4; 801 802 /* op init */ 803 st_offset = len; 804 st_size = &ib_cpu[len++]; /* size */ 805 ib_cpu[len++] = 0x01000001; /* RENCODE_IB_OP_INITIALIZE */ 806 *st_size = (len - st_offset) * 4; 807 808 /* session_init */ 809 st_offset = len; 810 st_size = &ib_cpu[len++]; /* size */ 811 ib_cpu[len++] = 0x00000003; /* RENCODE_IB_PARAM_SESSION_INIT */ 812 ib_cpu[len++] = 1; /* RENCODE_ENCODE_STANDARD_H264 */ 813 ib_cpu[len++] = width; 814 ib_cpu[len++] = height; 815 ib_cpu[len++] = 0; 816 ib_cpu[len++] = 0; 817 ib_cpu[len++] = 0; /* pre encode mode */ 818 ib_cpu[len++] = 0; /* chroma enabled : false */ 819 ib_cpu[len++] = 0; 820 ib_cpu[len++] = 0; 821 *st_size = (len - st_offset) * 4; 822 823 /* slice control */ 824 st_offset = len; 825 st_size = &ib_cpu[len++]; /* size */ 826 ib_cpu[len++] = 0x00200001; /* RENCODE_H264_IB_PARAM_SLICE_CONTROL */ 827 ib_cpu[len++] = 0; /* RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS */ 828 ib_cpu[len++] = ALIGN(width, 16) / 16 * ALIGN(height, 16) / 16; 829 *st_size = (len - st_offset) * 4; 830 831 /* enc spec misc */ 832 st_offset = len; 833 st_size = &ib_cpu[len++]; /* size */ 834 ib_cpu[len++] = 0x00200002; /* RENCODE_H264_IB_PARAM_SPEC_MISC */ 835 ib_cpu[len++] = 0; /* constrained intra pred flag */ 836 ib_cpu[len++] = 0; /* cabac enable */ 837 ib_cpu[len++] = 0; /* cabac init idc */ 838 ib_cpu[len++] = 1; /* half pel enabled */ 839 ib_cpu[len++] = 1; /* quarter pel enabled */ 840 ib_cpu[len++] = 100; /* BASELINE profile */ 841 ib_cpu[len++] = 11; /* level */ 842 if (vcn_ip_version_major >= 3) { 843 ib_cpu[len++] = 0; /* b_picture_enabled */ 844 ib_cpu[len++] = 0; /* weighted_bipred_idc */ 845 } 846 *st_size = (len - st_offset) * 4; 847 848 /* deblocking filter */ 849 st_offset = len; 850 st_size = &ib_cpu[len++]; /* size */ 851 ib_cpu[len++] = 0x00200004; /* RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER */ 852 ib_cpu[len++] = 0; /* disable deblocking filter idc */ 853 ib_cpu[len++] = 0; /* alpha c0 offset */ 854 ib_cpu[len++] = 0; /* tc offset */ 855 ib_cpu[len++] = 0; /* cb offset */ 856 ib_cpu[len++] = 0; /* cr offset */ 857 *st_size = (len - st_offset) * 4; 858 859 /* layer control */ 860 st_offset = len; 861 st_size = &ib_cpu[len++]; /* size */ 862 ib_cpu[len++] = 0x00000004; /* RENCODE_IB_PARAM_LAYER_CONTROL */ 863 ib_cpu[len++] = 1; /* max temporal layer */ 864 ib_cpu[len++] = 1; /* no of temporal layer */ 865 *st_size = (len - st_offset) * 4; 866 867 /* rc_session init */ 868 st_offset = len; 869 st_size = &ib_cpu[len++]; /* size */ 870 ib_cpu[len++] = 0x00000006; /* RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT */ 871 ib_cpu[len++] = 0; /* rate control */ 872 ib_cpu[len++] = 48; /* vbv buffer level */ 873 *st_size = (len - st_offset) * 4; 874 875 /* quality params */ 876 st_offset = len; 877 st_size = &ib_cpu[len++]; /* size */ 878 ib_cpu[len++] = 0x00000009; /* RENCODE_IB_PARAM_QUALITY_PARAMS */ 879 ib_cpu[len++] = 0; /* vbaq mode */ 880 ib_cpu[len++] = 0; /* scene change sensitivity */ 881 ib_cpu[len++] = 0; /* scene change min idr interval */ 882 ib_cpu[len++] = 0; 883 if (vcn_ip_version_major >= 3) 884 ib_cpu[len++] = 0; 885 *st_size = (len - st_offset) * 4; 886 887 /* layer select */ 888 st_offset = len; 889 st_size = &ib_cpu[len++]; /* size */ 890 ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */ 891 ib_cpu[len++] = 0; /* temporal layer */ 892 *st_size = (len - st_offset) * 4; 893 894 /* rc layer init */ 895 st_offset = len; 896 st_size = &ib_cpu[len++]; /* size */ 897 ib_cpu[len++] = 0x00000007; /* RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT */ 898 ib_cpu[len++] = 0; 899 ib_cpu[len++] = 0; 900 ib_cpu[len++] = 25; 901 ib_cpu[len++] = 1; 902 ib_cpu[len++] = 0x01312d00; 903 ib_cpu[len++] = 0; 904 ib_cpu[len++] = 0; 905 ib_cpu[len++] = 0; 906 *st_size = (len - st_offset) * 4; 907 908 /* layer select */ 909 st_offset = len; 910 st_size = &ib_cpu[len++]; /* size */ 911 ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */ 912 ib_cpu[len++] = 0; /* temporal layer */ 913 *st_size = (len - st_offset) * 4; 914 915 /* rc per pic */ 916 st_offset = len; 917 st_size = &ib_cpu[len++]; /* size */ 918 ib_cpu[len++] = 0x00000008; /* RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE */ 919 ib_cpu[len++] = 20; 920 ib_cpu[len++] = 0; 921 ib_cpu[len++] = 51; 922 ib_cpu[len++] = 0; 923 ib_cpu[len++] = 1; 924 ib_cpu[len++] = 0; 925 ib_cpu[len++] = 1; 926 ib_cpu[len++] = 0; 927 *st_size = (len - st_offset) * 4; 928 929 /* op init rc */ 930 st_offset = len; 931 st_size = &ib_cpu[len++]; /* size */ 932 ib_cpu[len++] = 0x01000004; /* RENCODE_IB_OP_INIT_RC */ 933 *st_size = (len - st_offset) * 4; 934 935 /* op init rc vbv */ 936 st_offset = len; 937 st_size = &ib_cpu[len++]; /* size */ 938 ib_cpu[len++] = 0x01000005; /* RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL */ 939 *st_size = (len - st_offset) * 4; 940 941 *p_task_size = (len - task_offset) * 4; 942 943 if (vcn_unified_ring) 944 amdgpu_cs_sq_ib_tail(ib_cpu + len); 945 946 r = submit(len, AMDGPU_HW_IP_VCN_ENC); 947 CU_ASSERT_EQUAL(r, 0); 948 } 949 950 static int32_t h264_se (bufferInfo * bufInfo) 951 { 952 uint32_t ret; 953 954 ret = bs_read_ue (bufInfo); 955 if ((ret & 0x1) == 0) { 956 ret >>= 1; 957 int32_t temp = 0 - ret; 958 return temp; 959 } 960 961 return (ret + 1) >> 1; 962 } 963 964 static void h264_check_0s (bufferInfo * bufInfo, int count) 965 { 966 uint32_t val; 967 968 val = bs_read_u (bufInfo, count); 969 if (val != 0) { 970 printf ("field error - %d bits should be 0 is %x\n", count, val); 971 } 972 } 973 974 static inline int bs_eof(bufferInfo * bufinfo) 975 { 976 if (bufinfo->decBuffer >= bufinfo->end) 977 return 1; 978 else 979 return 0; 980 } 981 982 static inline uint32_t bs_read_u1(bufferInfo *bufinfo) 983 { 984 uint32_t r = 0; 985 uint32_t temp = 0; 986 987 bufinfo->numOfBitsInBuffer--; 988 if (! bs_eof(bufinfo)) { 989 temp = (((bufinfo->decData)) >> bufinfo->numOfBitsInBuffer); 990 r = temp & 0x01; 991 } 992 993 if (bufinfo->numOfBitsInBuffer == 0) { 994 bufinfo->decBuffer++; 995 bufinfo->decData = *bufinfo->decBuffer; 996 bufinfo->numOfBitsInBuffer = 8; 997 } 998 999 return r; 1000 } 1001 1002 static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n) 1003 { 1004 uint32_t r = 0; 1005 int i; 1006 1007 for (i = 0; i < n; i++) { 1008 r |= ( bs_read_u1(bufinfo) << ( n - i - 1 ) ); 1009 } 1010 1011 return r; 1012 } 1013 1014 static inline uint32_t bs_read_ue(bufferInfo* bufinfo) 1015 { 1016 int32_t r = 0; 1017 int i = 0; 1018 1019 while( (bs_read_u1(bufinfo) == 0) && (i < 32) && (!bs_eof(bufinfo))) { 1020 i++; 1021 } 1022 r = bs_read_u(bufinfo, i); 1023 r += (1 << i) - 1; 1024 return r; 1025 } 1026 1027 static uint32_t remove_03 (uint8_t * bptr, uint32_t len) 1028 { 1029 uint32_t nal_len = 0; 1030 while (nal_len + 2 < len) { 1031 if (bptr[0] == 0 && bptr[1] == 0 && bptr[2] == 3) { 1032 bptr += 2; 1033 nal_len += 2; 1034 len--; 1035 memmove (bptr, bptr + 1, len - nal_len); 1036 } else { 1037 bptr++; 1038 nal_len++; 1039 } 1040 } 1041 return len; 1042 } 1043 1044 static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo * bufInfo) 1045 { 1046 uint32_t lastScale = 8, nextScale = 8; 1047 uint32_t jx; 1048 int deltaScale; 1049 1050 for (jx = 0; jx < sizeOfScalingList; jx++) { 1051 if (nextScale != 0) { 1052 deltaScale = h264_se (bufInfo); 1053 nextScale = (lastScale + deltaScale + 256) % 256; 1054 } 1055 if (nextScale == 0) { 1056 lastScale = lastScale; 1057 } else { 1058 lastScale = nextScale; 1059 } 1060 } 1061 } 1062 1063 static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo * bufInfo) 1064 { 1065 uint32_t temp; 1066 1067 dec->profile = bs_read_u (bufInfo, 8); 1068 bs_read_u (bufInfo, 1); /* constaint_set0_flag */ 1069 bs_read_u (bufInfo, 1); /* constaint_set1_flag */ 1070 bs_read_u (bufInfo, 1); /* constaint_set2_flag */ 1071 bs_read_u (bufInfo, 1); /* constaint_set3_flag */ 1072 bs_read_u (bufInfo, 1); /* constaint_set4_flag */ 1073 bs_read_u (bufInfo, 1); /* constaint_set5_flag */ 1074 1075 1076 h264_check_0s (bufInfo, 2); 1077 dec->level_idc = bs_read_u (bufInfo, 8); 1078 bs_read_ue (bufInfo); /* SPS id*/ 1079 1080 if (dec->profile == 100 || dec->profile == 110 || 1081 dec->profile == 122 || dec->profile == 144) { 1082 uint32_t chroma_format_idc = bs_read_ue (bufInfo); 1083 if (chroma_format_idc == 3) { 1084 bs_read_u (bufInfo, 1); /* residual_colour_transform_flag */ 1085 } 1086 bs_read_ue (bufInfo); /* bit_depth_luma_minus8 */ 1087 bs_read_ue (bufInfo); /* bit_depth_chroma_minus8 */ 1088 bs_read_u (bufInfo, 1); /* qpprime_y_zero_transform_bypass_flag */ 1089 uint32_t seq_scaling_matrix_present_flag = bs_read_u (bufInfo, 1); 1090 1091 if (seq_scaling_matrix_present_flag) { 1092 for (uint32_t ix = 0; ix < 8; ix++) { 1093 temp = bs_read_u (bufInfo, 1); 1094 if (temp) { 1095 scaling_list (ix, ix < 6 ? 16 : 64, bufInfo); 1096 } 1097 } 1098 } 1099 } 1100 1101 bs_read_ue (bufInfo); /* log2_max_frame_num_minus4 */ 1102 uint32_t pic_order_cnt_type = bs_read_ue (bufInfo); 1103 1104 if (pic_order_cnt_type == 0) { 1105 bs_read_ue (bufInfo); /* log2_max_pic_order_cnt_lsb_minus4 */ 1106 } else if (pic_order_cnt_type == 1) { 1107 bs_read_u (bufInfo, 1); /* delta_pic_order_always_zero_flag */ 1108 h264_se (bufInfo); /* offset_for_non_ref_pic */ 1109 h264_se (bufInfo); /* offset_for_top_to_bottom_field */ 1110 temp = bs_read_ue (bufInfo); 1111 for (uint32_t ix = 0; ix < temp; ix++) { 1112 h264_se (bufInfo); /* offset_for_ref_frame[index] */ 1113 } 1114 } 1115 bs_read_ue (bufInfo); /* num_ref_frames */ 1116 bs_read_u (bufInfo, 1); /* gaps_in_frame_num_flag */ 1117 uint32_t PicWidthInMbs = bs_read_ue (bufInfo) + 1; 1118 1119 dec->pic_width = PicWidthInMbs * 16; 1120 uint32_t PicHeightInMapUnits = bs_read_ue (bufInfo) + 1; 1121 1122 dec->pic_height = PicHeightInMapUnits * 16; 1123 uint32_t frame_mbs_only_flag = bs_read_u (bufInfo, 1); 1124 if (!frame_mbs_only_flag) { 1125 bs_read_u (bufInfo, 1); /* mb_adaptive_frame_field_flag */ 1126 } 1127 bs_read_u (bufInfo, 1); /* direct_8x8_inference_flag */ 1128 temp = bs_read_u (bufInfo, 1); 1129 if (temp) { 1130 bs_read_ue (bufInfo); /* frame_crop_left_offset */ 1131 bs_read_ue (bufInfo); /* frame_crop_right_offset */ 1132 bs_read_ue (bufInfo); /* frame_crop_top_offset */ 1133 bs_read_ue (bufInfo); /* frame_crop_bottom_offset */ 1134 } 1135 temp = bs_read_u (bufInfo, 1); /* VUI Parameters */ 1136 } 1137 1138 static void h264_slice_header (h264_decode * dec, bufferInfo * bufInfo) 1139 { 1140 uint32_t temp; 1141 1142 bs_read_ue (bufInfo); /* first_mb_in_slice */ 1143 temp = bs_read_ue (bufInfo); 1144 dec->slice_type = ((temp > 5) ? (temp - 5) : temp); 1145 } 1146 1147 static uint8_t h264_parse_nal (h264_decode * dec, bufferInfo * bufInfo) 1148 { 1149 uint8_t type = 0; 1150 1151 h264_check_0s (bufInfo, 1); 1152 dec->nal_ref_idc = bs_read_u (bufInfo, 2); 1153 dec->nal_unit_type = type = bs_read_u (bufInfo, 5); 1154 switch (type) 1155 { 1156 case H264_NAL_TYPE_NON_IDR_SLICE: 1157 case H264_NAL_TYPE_IDR_SLICE: 1158 h264_slice_header (dec, bufInfo); 1159 break; 1160 case H264_NAL_TYPE_SEQ_PARAM: 1161 h264_parse_sequence_parameter_set (dec, bufInfo); 1162 break; 1163 case H264_NAL_TYPE_PIC_PARAM: 1164 case H264_NAL_TYPE_SEI: 1165 case H264_NAL_TYPE_ACCESS_UNIT: 1166 case H264_NAL_TYPE_SEQ_EXTENSION: 1167 /* NOP */ 1168 break; 1169 default: 1170 printf ("Nal type unknown %d \n ", type); 1171 break; 1172 } 1173 return type; 1174 } 1175 1176 static uint32_t h264_find_next_start_code (uint8_t * pBuf, uint32_t bufLen) 1177 { 1178 uint32_t val; 1179 uint32_t offset, startBytes; 1180 1181 offset = startBytes = 0; 1182 if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 0 && pBuf[3] == 1) { 1183 pBuf += 4; 1184 offset = 4; 1185 startBytes = 1; 1186 } else if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 1) { 1187 pBuf += 3; 1188 offset = 3; 1189 startBytes = 1; 1190 } 1191 val = 0xffffffff; 1192 while (offset < bufLen - 3) { 1193 val <<= 8; 1194 val |= *pBuf++; 1195 offset++; 1196 if (val == H264_START_CODE) 1197 return offset - 4; 1198 1199 if ((val & 0x00ffffff) == H264_START_CODE) 1200 return offset - 3; 1201 } 1202 if (bufLen - offset <= 3 && startBytes == 0) { 1203 startBytes = 0; 1204 return 0; 1205 } 1206 1207 return offset; 1208 } 1209 1210 static int verify_checksum(uint8_t *buffer, uint32_t buffer_size) 1211 { 1212 uint32_t buffer_pos = 0; 1213 int done = 0; 1214 h264_decode dec; 1215 1216 memset(&dec, 0, sizeof(h264_decode)); 1217 do { 1218 uint32_t ret; 1219 1220 ret = h264_find_next_start_code (buffer + buffer_pos, 1221 buffer_size - buffer_pos); 1222 if (ret == 0) { 1223 done = 1; 1224 if (buffer_pos == 0) { 1225 fprintf (stderr, 1226 "couldn't find start code in buffer from 0\n"); 1227 } 1228 } else { 1229 /* have a complete NAL from buffer_pos to end */ 1230 if (ret > 3) { 1231 uint32_t nal_len; 1232 bufferInfo bufinfo; 1233 1234 nal_len = remove_03 (buffer + buffer_pos, ret); 1235 bufinfo.decBuffer = buffer + buffer_pos + (buffer[buffer_pos + 2] == 1 ? 3 : 4); 1236 bufinfo.decBufferSize = (nal_len - (buffer[buffer_pos + 2] == 1 ? 3 : 4)) * 8; 1237 bufinfo.end = buffer + buffer_pos + nal_len; 1238 bufinfo.numOfBitsInBuffer = 8; 1239 bufinfo.decData = *bufinfo.decBuffer; 1240 h264_parse_nal (&dec, &bufinfo); 1241 } 1242 buffer_pos += ret; /* buffer_pos points to next code */ 1243 } 1244 } while (done == 0); 1245 1246 if ((dec.pic_width == gWidth) && 1247 (dec.pic_height == gHeight) && 1248 (dec.slice_type == gSliceType)) 1249 return 0; 1250 else 1251 return -1; 1252 } 1253 1254 static void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_buf, int frame_type) 1255 { 1256 uint32_t *fb_ptr; 1257 uint8_t *bs_ptr; 1258 uint32_t size; 1259 int r; 1260 /* uint64_t s[3] = {0, 1121279001727, 1059312481445}; */ 1261 1262 r = amdgpu_bo_cpu_map(fb_buf.handle, (void **)&fb_buf.ptr); 1263 CU_ASSERT_EQUAL(r, 0); 1264 fb_ptr = (uint32_t*)fb_buf.ptr; 1265 size = fb_ptr[6]; 1266 r = amdgpu_bo_cpu_unmap(fb_buf.handle); 1267 CU_ASSERT_EQUAL(r, 0); 1268 r = amdgpu_bo_cpu_map(bs_buf.handle, (void **)&bs_buf.ptr); 1269 CU_ASSERT_EQUAL(r, 0); 1270 1271 bs_ptr = (uint8_t*)bs_buf.ptr; 1272 r = verify_checksum(bs_ptr, size); 1273 CU_ASSERT_EQUAL(r, 0); 1274 r = amdgpu_bo_cpu_unmap(bs_buf.handle); 1275 1276 CU_ASSERT_EQUAL(r, 0); 1277 } 1278 1279 static void amdgpu_cs_vcn_ib_zero_count(int *len, int num) 1280 { 1281 for (int i = 0; i < num; i++) 1282 ib_cpu[(*len)++] = 0; 1283 } 1284 1285 static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) 1286 { 1287 struct amdgpu_vcn_bo bs_buf, fb_buf, input_buf; 1288 int len, r; 1289 unsigned width = 160, height = 128, buf_size; 1290 uint32_t *p_task_size = NULL; 1291 uint32_t task_offset = 0, st_offset; 1292 uint32_t *st_size = NULL; 1293 uint32_t fw_maj = 1, fw_min = 9; 1294 1295 if (vcn_ip_version_major == 2) { 1296 fw_maj = 1; 1297 fw_min = 1; 1298 } else if (vcn_ip_version_major == 3) { 1299 fw_maj = 1; 1300 fw_min = 0; 1301 } 1302 gSliceType = frame_type; 1303 buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; 1304 1305 num_resources = 0; 1306 alloc_resource(&bs_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); 1307 alloc_resource(&fb_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); 1308 alloc_resource(&input_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT); 1309 resources[num_resources++] = enc_buf.handle; 1310 resources[num_resources++] = cpb_buf.handle; 1311 resources[num_resources++] = bs_buf.handle; 1312 resources[num_resources++] = fb_buf.handle; 1313 resources[num_resources++] = input_buf.handle; 1314 resources[num_resources++] = ib_handle; 1315 1316 1317 r = amdgpu_bo_cpu_map(bs_buf.handle, (void**)&bs_buf.ptr); 1318 memset(bs_buf.ptr, 0, 4096); 1319 r = amdgpu_bo_cpu_unmap(bs_buf.handle); 1320 1321 r = amdgpu_bo_cpu_map(fb_buf.handle, (void**)&fb_buf.ptr); 1322 memset(fb_buf.ptr, 0, 4096); 1323 r = amdgpu_bo_cpu_unmap(fb_buf.handle); 1324 1325 r = amdgpu_bo_cpu_map(input_buf.handle, (void **)&input_buf.ptr); 1326 CU_ASSERT_EQUAL(r, 0); 1327 1328 for (int i = 0; i < ALIGN(height, 32) * 3 / 2; i++) 1329 memcpy(input_buf.ptr + i * ALIGN(width, 256), frame + i * width, width); 1330 1331 r = amdgpu_bo_cpu_unmap(input_buf.handle); 1332 CU_ASSERT_EQUAL(r, 0); 1333 1334 len = 0; 1335 1336 if (vcn_unified_ring) 1337 amdgpu_cs_sq_head(ib_cpu, &len, true); 1338 1339 /* session info */ 1340 st_offset = len; 1341 st_size = &ib_cpu[len++]; /* size */ 1342 ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ 1343 ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); 1344 ib_cpu[len++] = enc_buf.addr >> 32; 1345 ib_cpu[len++] = enc_buf.addr; 1346 ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE */; 1347 *st_size = (len - st_offset) * 4; 1348 1349 /* task info */ 1350 task_offset = len; 1351 st_offset = len; 1352 st_size = &ib_cpu[len++]; /* size */ 1353 ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ 1354 p_task_size = &ib_cpu[len++]; 1355 ib_cpu[len++] = enc_task_id++; /* task_id */ 1356 ib_cpu[len++] = 1; /* feedback */ 1357 *st_size = (len - st_offset) * 4; 1358 1359 if (frame_type == 2) { 1360 /* sps */ 1361 st_offset = len; 1362 st_size = &ib_cpu[len++]; /* size */ 1363 if(vcn_ip_version_major == 1) 1364 ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */ 1365 else 1366 ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn */ 1367 ib_cpu[len++] = 0x00000002; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS */ 1368 ib_cpu[len++] = 0x00000011; /* sps len */ 1369 ib_cpu[len++] = 0x00000001; /* start code */ 1370 ib_cpu[len++] = 0x6764440b; 1371 ib_cpu[len++] = 0xac54c284; 1372 ib_cpu[len++] = 0x68078442; 1373 ib_cpu[len++] = 0x37000000; 1374 *st_size = (len - st_offset) * 4; 1375 1376 /* pps */ 1377 st_offset = len; 1378 st_size = &ib_cpu[len++]; /* size */ 1379 if(vcn_ip_version_major == 1) 1380 ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/ 1381 else 1382 ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn*/ 1383 ib_cpu[len++] = 0x00000003; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS */ 1384 ib_cpu[len++] = 0x00000008; /* pps len */ 1385 ib_cpu[len++] = 0x00000001; /* start code */ 1386 ib_cpu[len++] = 0x68ce3c80; 1387 *st_size = (len - st_offset) * 4; 1388 } 1389 1390 /* slice header */ 1391 st_offset = len; 1392 st_size = &ib_cpu[len++]; /* size */ 1393 if(vcn_ip_version_major == 1) 1394 ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */ 1395 else 1396 ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER other vcn */ 1397 if (frame_type == 2) { 1398 ib_cpu[len++] = 0x65000000; 1399 ib_cpu[len++] = 0x11040000; 1400 } else { 1401 ib_cpu[len++] = 0x41000000; 1402 ib_cpu[len++] = 0x34210000; 1403 } 1404 ib_cpu[len++] = 0xe0000000; 1405 amdgpu_cs_vcn_ib_zero_count(&len, 13); 1406 1407 ib_cpu[len++] = 0x00000001; 1408 ib_cpu[len++] = 0x00000008; 1409 ib_cpu[len++] = 0x00020000; 1410 ib_cpu[len++] = 0x00000000; 1411 ib_cpu[len++] = 0x00000001; 1412 ib_cpu[len++] = 0x00000015; 1413 ib_cpu[len++] = 0x00020001; 1414 ib_cpu[len++] = 0x00000000; 1415 ib_cpu[len++] = 0x00000001; 1416 ib_cpu[len++] = 0x00000003; 1417 amdgpu_cs_vcn_ib_zero_count(&len, 22); 1418 *st_size = (len - st_offset) * 4; 1419 1420 /* encode params */ 1421 st_offset = len; 1422 st_size = &ib_cpu[len++]; /* size */ 1423 if(vcn_ip_version_major == 1) 1424 ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1 */ 1425 else 1426 ib_cpu[len++] = 0x0000000f; /* RENCODE_IB_PARAM_ENCODE_PARAMS other vcn */ 1427 ib_cpu[len++] = frame_type; 1428 ib_cpu[len++] = 0x0001f000; 1429 ib_cpu[len++] = input_buf.addr >> 32; 1430 ib_cpu[len++] = input_buf.addr; 1431 ib_cpu[len++] = (input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32; 1432 ib_cpu[len++] = input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32); 1433 ib_cpu[len++] = 0x00000100; 1434 ib_cpu[len++] = 0x00000080; 1435 ib_cpu[len++] = 0x00000000; 1436 ib_cpu[len++] = 0xffffffff; 1437 ib_cpu[len++] = 0x00000000; 1438 *st_size = (len - st_offset) * 4; 1439 1440 /* encode params h264 */ 1441 st_offset = len; 1442 st_size = &ib_cpu[len++]; /* size */ 1443 ib_cpu[len++] = 0x00200003; /* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */ 1444 if (vcn_ip_version_major <= 2) { 1445 ib_cpu[len++] = 0x00000000; 1446 ib_cpu[len++] = 0x00000000; 1447 ib_cpu[len++] = 0x00000000; 1448 ib_cpu[len++] = 0xffffffff; 1449 } else { 1450 ib_cpu[len++] = 0x00000000; 1451 ib_cpu[len++] = 0x00000000; 1452 ib_cpu[len++] = 0x00000000; 1453 ib_cpu[len++] = 0x00000000; 1454 ib_cpu[len++] = 0x00000000; 1455 ib_cpu[len++] = 0x00000000; 1456 ib_cpu[len++] = 0x00000000; 1457 ib_cpu[len++] = 0xffffffff; 1458 ib_cpu[len++] = 0x00000000; 1459 ib_cpu[len++] = 0x00000000; 1460 ib_cpu[len++] = 0x00000000; 1461 ib_cpu[len++] = 0x00000000; 1462 ib_cpu[len++] = 0xffffffff; 1463 ib_cpu[len++] = 0x00000000; 1464 ib_cpu[len++] = 0x00000000; 1465 ib_cpu[len++] = 0x00000000; 1466 ib_cpu[len++] = 0x00000000; 1467 ib_cpu[len++] = 0x00000001; 1468 } 1469 *st_size = (len - st_offset) * 4; 1470 1471 /* encode context */ 1472 st_offset = len; 1473 st_size = &ib_cpu[len++]; /* size */ 1474 if(vcn_ip_version_major == 1) 1475 ib_cpu[len++] = 0x0000000d; /* ENCODE_CONTEXT_BUFFER vcn 1 */ 1476 else 1477 ib_cpu[len++] = 0x00000011; /* ENCODE_CONTEXT_BUFFER other vcn */ 1478 ib_cpu[len++] = cpb_buf.addr >> 32; 1479 ib_cpu[len++] = cpb_buf.addr; 1480 ib_cpu[len++] = 0x00000000; /* swizzle mode */ 1481 ib_cpu[len++] = 0x00000100; /* luma pitch */ 1482 ib_cpu[len++] = 0x00000100; /* chroma pitch */ 1483 ib_cpu[len++] = 0x00000002; /* no reconstructed picture */ 1484 ib_cpu[len++] = 0x00000000; /* reconstructed pic 1 luma offset */ 1485 ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32); /* pic1 chroma offset */ 1486 if(vcn_ip_version_major == 4) 1487 amdgpu_cs_vcn_ib_zero_count(&len, 2); 1488 ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; /* pic2 luma offset */ 1489 ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 5 / 2; /* pic2 chroma offset */ 1490 1491 amdgpu_cs_vcn_ib_zero_count(&len, 280); 1492 *st_size = (len - st_offset) * 4; 1493 1494 /* bitstream buffer */ 1495 st_offset = len; 1496 st_size = &ib_cpu[len++]; /* size */ 1497 if(vcn_ip_version_major == 1) 1498 ib_cpu[len++] = 0x0000000e; /* VIDEO_BITSTREAM_BUFFER vcn 1 */ 1499 else 1500 ib_cpu[len++] = 0x00000012; /* VIDEO_BITSTREAM_BUFFER other vcn */ 1501 1502 ib_cpu[len++] = 0x00000000; /* mode */ 1503 ib_cpu[len++] = bs_buf.addr >> 32; 1504 ib_cpu[len++] = bs_buf.addr; 1505 ib_cpu[len++] = 0x0001f000; 1506 ib_cpu[len++] = 0x00000000; 1507 *st_size = (len - st_offset) * 4; 1508 1509 /* feedback */ 1510 st_offset = len; 1511 st_size = &ib_cpu[len++]; /* size */ 1512 if(vcn_ip_version_major == 1) 1513 ib_cpu[len++] = 0x00000010; /* FEEDBACK_BUFFER vcn 1 */ 1514 else 1515 ib_cpu[len++] = 0x00000015; /* FEEDBACK_BUFFER vcn 2,3 */ 1516 ib_cpu[len++] = 0x00000000; 1517 ib_cpu[len++] = fb_buf.addr >> 32; 1518 ib_cpu[len++] = fb_buf.addr; 1519 ib_cpu[len++] = 0x00000010; 1520 ib_cpu[len++] = 0x00000028; 1521 *st_size = (len - st_offset) * 4; 1522 1523 /* intra refresh */ 1524 st_offset = len; 1525 st_size = &ib_cpu[len++]; 1526 if(vcn_ip_version_major == 1) 1527 ib_cpu[len++] = 0x0000000c; /* INTRA_REFRESH vcn 1 */ 1528 else 1529 ib_cpu[len++] = 0x00000010; /* INTRA_REFRESH vcn 2,3 */ 1530 ib_cpu[len++] = 0x00000000; 1531 ib_cpu[len++] = 0x00000000; 1532 ib_cpu[len++] = 0x00000000; 1533 *st_size = (len - st_offset) * 4; 1534 1535 if(vcn_ip_version_major != 1) { 1536 /* Input Format */ 1537 st_offset = len; 1538 st_size = &ib_cpu[len++]; 1539 ib_cpu[len++] = 0x0000000c; 1540 ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */ 1541 ib_cpu[len++] = 0x00000000; 1542 ib_cpu[len++] = 0x00000000; 1543 ib_cpu[len++] = 0x00000000; 1544 ib_cpu[len++] = 0x00000000; 1545 ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */ 1546 ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_PACKING_FORMAT_NV12 */ 1547 *st_size = (len - st_offset) * 4; 1548 1549 /* Output Format */ 1550 st_offset = len; 1551 st_size = &ib_cpu[len++]; 1552 ib_cpu[len++] = 0x0000000d; 1553 ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */ 1554 ib_cpu[len++] = 0x00000000; 1555 ib_cpu[len++] = 0x00000000; 1556 ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */ 1557 *st_size = (len - st_offset) * 4; 1558 } 1559 /* op_speed */ 1560 st_offset = len; 1561 st_size = &ib_cpu[len++]; 1562 ib_cpu[len++] = 0x01000006; /* SPEED_ENCODING_MODE */ 1563 *st_size = (len - st_offset) * 4; 1564 1565 /* op_enc */ 1566 st_offset = len; 1567 st_size = &ib_cpu[len++]; 1568 ib_cpu[len++] = 0x01000003; 1569 *st_size = (len - st_offset) * 4; 1570 1571 *p_task_size = (len - task_offset) * 4; 1572 1573 if (vcn_unified_ring) 1574 amdgpu_cs_sq_ib_tail(ib_cpu + len); 1575 1576 r = submit(len, AMDGPU_HW_IP_VCN_ENC); 1577 CU_ASSERT_EQUAL(r, 0); 1578 1579 /* check result */ 1580 check_result(fb_buf, bs_buf, frame_type); 1581 1582 free_resource(&fb_buf); 1583 free_resource(&bs_buf); 1584 free_resource(&input_buf); 1585 } 1586 1587 static void amdgpu_cs_vcn_enc_encode(void) 1588 { 1589 amdgpu_cs_vcn_enc_encode_frame(2); /* IDR frame */ 1590 } 1591 1592 static void amdgpu_cs_vcn_enc_destroy(void) 1593 { 1594 int len = 0, r; 1595 uint32_t *p_task_size = NULL; 1596 uint32_t task_offset = 0, st_offset; 1597 uint32_t *st_size = NULL; 1598 uint32_t fw_maj = 1, fw_min = 9; 1599 1600 if (vcn_ip_version_major == 2) { 1601 fw_maj = 1; 1602 fw_min = 1; 1603 } else if (vcn_ip_version_major == 3) { 1604 fw_maj = 1; 1605 fw_min = 0; 1606 } 1607 1608 num_resources = 0; 1609 /* alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); */ 1610 resources[num_resources++] = enc_buf.handle; 1611 resources[num_resources++] = ib_handle; 1612 1613 if (vcn_unified_ring) 1614 amdgpu_cs_sq_head(ib_cpu, &len, true); 1615 1616 /* session info */ 1617 st_offset = len; 1618 st_size = &ib_cpu[len++]; /* size */ 1619 ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ 1620 ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); 1621 ib_cpu[len++] = enc_buf.addr >> 32; 1622 ib_cpu[len++] = enc_buf.addr; 1623 ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */ 1624 *st_size = (len - st_offset) * 4; 1625 1626 /* task info */ 1627 task_offset = len; 1628 st_offset = len; 1629 st_size = &ib_cpu[len++]; /* size */ 1630 ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ 1631 p_task_size = &ib_cpu[len++]; 1632 ib_cpu[len++] = enc_task_id++; /* task_id */ 1633 ib_cpu[len++] = 0; /* feedback */ 1634 *st_size = (len - st_offset) * 4; 1635 1636 /* op close */ 1637 st_offset = len; 1638 st_size = &ib_cpu[len++]; 1639 ib_cpu[len++] = 0x01000002; /* RENCODE_IB_OP_CLOSE_SESSION */ 1640 *st_size = (len - st_offset) * 4; 1641 1642 *p_task_size = (len - task_offset) * 4; 1643 1644 if (vcn_unified_ring) 1645 amdgpu_cs_sq_ib_tail(ib_cpu + len); 1646 1647 r = submit(len, AMDGPU_HW_IP_VCN_ENC); 1648 CU_ASSERT_EQUAL(r, 0); 1649 1650 free_resource(&cpb_buf); 1651 free_resource(&enc_buf); 1652 } 1653