From ba42de95da4312230b16c25a0ce1e75d8839f9c1 Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Sun, 13 Sep 2020 20:42:47 -0400 Subject: [PATCH] radeon/vcn: support hevc SAO enc for VCN2+ Add support of sample adaptive offset (SAO) in HEVC encode for VCN2 and above. Signed-off-by: Ruijing Dong Acked-by: Leo Liu Part-of: --- src/gallium/drivers/radeon/radeon_vcn_enc.h | 4 +- .../drivers/radeon/radeon_vcn_enc_1_2.c | 4 +- .../drivers/radeon/radeon_vcn_enc_2_0.c | 147 +++++++++++++++++- 3 files changed, 150 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h b/src/gallium/drivers/radeon/radeon_vcn_enc.h index a881042833a..8058f9ab4f6 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc.h +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h @@ -82,6 +82,8 @@ #define RENCODE_HEVC_HEADER_INSTRUCTION_FIRST_SLICE 0x00010001 #define RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_SEGMENT 0x00010002 #define RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_QP_DELTA 0x00010003 +#define RENCODE_HEVC_HEADER_INSTRUCTION_SAO_ENABLE 0x00010004 +#define RENCODE_HEVC_HEADER_INSTRUCTION_LOOP_FILTER_ACROSS_SLICES_ENABLE 0x00010005 #define RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB 0x00020000 #define RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA 0x00020001 @@ -501,7 +503,7 @@ struct radeon_encoder { void (*op_enc)(struct radeon_encoder *enc); void (*op_init_rc)(struct radeon_encoder *enc); void (*op_init_rc_vbv)(struct radeon_encoder *enc); - void (*op_speed)(struct radeon_encoder *enc); + void (*op_preset)(struct radeon_encoder *enc); void (*encode_headers)(struct radeon_encoder *enc); void (*input_format)(struct radeon_encoder *enc); void (*output_format)(struct radeon_encoder *enc); diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c index a267e197bc3..1b8cf798254 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c @@ -1156,7 +1156,7 @@ static void encode(struct radeon_encoder *enc) enc->feedback(enc); enc->intra_refresh(enc); - enc->op_speed(enc); + enc->op_preset(enc); enc->op_enc(enc); *enc->p_task_size = (enc->total_task_size); } @@ -1193,7 +1193,7 @@ void radeon_enc_1_2_init(struct radeon_encoder *enc) enc->op_enc = radeon_enc_op_enc; enc->op_init_rc = radeon_enc_op_init_rc; enc->op_init_rc_vbv = radeon_enc_op_init_rc_vbv; - enc->op_speed = radeon_enc_op_speed; + enc->op_preset = radeon_enc_op_speed; if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC) { enc->session_init = radeon_enc_session_init; diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c b/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c index 56c6bf9a478..b8dd69ae802 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_2_0.c @@ -73,6 +73,147 @@ #define RENCODE_COLOR_PACKING_FORMAT_NV12 0 #define RENCODE_COLOR_PACKING_FORMAT_P010 1 +static void radeon_enc_op_balance(struct radeon_encoder *enc) +{ + RADEON_ENC_BEGIN(RENCODE_IB_OP_SET_BALANCE_ENCODING_MODE); + RADEON_ENC_END(); +} + +static void radeon_enc_slice_header_hevc(struct radeon_encoder *enc) +{ + uint32_t instruction[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; + uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0}; + unsigned int inst_index = 0; + unsigned int cdw_start = 0; + unsigned int cdw_filled = 0; + unsigned int bits_copied = 0; + RADEON_ENC_BEGIN(enc->cmd.slice_header); + radeon_enc_reset(enc); + radeon_enc_set_emulation_prevention(enc, false); + + cdw_start = enc->cs->current.cdw; + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6); + radeon_enc_code_fixed_bits(enc, 0x0, 6); + radeon_enc_code_fixed_bits(enc, 0x1, 3); + + radeon_enc_flush_headers(enc); + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_FIRST_SLICE; + inst_index++; + + if ((enc->enc_pic.nal_unit_type >= 16) && (enc->enc_pic.nal_unit_type <= 23)) + radeon_enc_code_fixed_bits(enc, 0x0, 1); + + radeon_enc_code_ue(enc, 0x0); + + radeon_enc_flush_headers(enc); + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_SEGMENT; + inst_index++; + + instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_DEPENDENT_SLICE_END; + inst_index++; + + switch (enc->enc_pic.picture_type) { + case PIPE_H265_ENC_PICTURE_TYPE_I: + case PIPE_H265_ENC_PICTURE_TYPE_IDR: + radeon_enc_code_ue(enc, 0x2); + break; + case PIPE_H265_ENC_PICTURE_TYPE_P: + case PIPE_H265_ENC_PICTURE_TYPE_SKIP: + radeon_enc_code_ue(enc, 0x1); + break; + case PIPE_H265_ENC_PICTURE_TYPE_B: + radeon_enc_code_ue(enc, 0x0); + break; + default: + radeon_enc_code_ue(enc, 0x1); + } + + if ((enc->enc_pic.nal_unit_type != 19) && (enc->enc_pic.nal_unit_type != 20)) { + radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt, enc->enc_pic.log2_max_poc); + if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) + radeon_enc_code_fixed_bits(enc, 0x1, 1); + else { + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_ue(enc, 0x0); + radeon_enc_code_ue(enc, 0x0); + } + } + + if (enc->enc_pic.sample_adaptive_offset_enabled_flag) { + radeon_enc_flush_headers(enc); + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_SAO_ENABLE; + inst_index++; + } + + if ((enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) || + (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)) { + radeon_enc_code_fixed_bits(enc, 0x0, 1); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.cabac_init_flag, 1); + radeon_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand); + } + + radeon_enc_flush_headers(enc); + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_QP_DELTA; + inst_index++; + + if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) && + (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled || + enc->enc_pic.sample_adaptive_offset_enabled_flag)) { + if (enc->enc_pic.sample_adaptive_offset_enabled_flag) { + radeon_enc_flush_headers(enc); + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + + instruction[inst_index] = RENCODE_HEVC_HEADER_INSTRUCTION_LOOP_FILTER_ACROSS_SLICES_ENABLE; + inst_index++; + } + else + radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled, 1); + } + + radeon_enc_flush_headers(enc); + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY; + num_bits[inst_index] = enc->bits_output - bits_copied; + bits_copied = enc->bits_output; + inst_index++; + instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END; + + cdw_filled = enc->cs->current.cdw - cdw_start; + for (int i = 0; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS - cdw_filled; i++) + RADEON_ENC_CS(0x00000000); + + for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) { + RADEON_ENC_CS(instruction[j]); + RADEON_ENC_CS(num_bits[j]); + } + + RADEON_ENC_END(); +} + static void radeon_enc_quality_params(struct radeon_encoder *enc) { enc->enc_pic.quality_params.vbaq_mode = 0; @@ -97,7 +238,7 @@ static void radeon_enc_loop_filter_hevc(struct radeon_encoder *enc) RADEON_ENC_CS(enc->enc_pic.hevc_deblock.tc_offset_div2); RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cb_qp_offset); RADEON_ENC_CS(enc->enc_pic.hevc_deblock.cr_qp_offset); - RADEON_ENC_CS(1); + RADEON_ENC_CS(!enc->enc_pic.sample_adaptive_offset_enabled_flag); RADEON_ENC_END(); } @@ -346,7 +487,7 @@ static void encode(struct radeon_encoder *enc) enc->input_format(enc); enc->output_format(enc); - enc->op_speed(enc); + enc->op_preset(enc); enc->op_enc(enc); *enc->p_task_size = (enc->total_task_size); } @@ -364,6 +505,8 @@ void radeon_enc_2_0_init(struct radeon_encoder *enc) enc->deblocking_filter = radeon_enc_loop_filter_hevc; enc->nalu_sps = radeon_enc_nalu_sps_hevc; enc->nalu_pps = radeon_enc_nalu_pps_hevc; + enc->slice_header = radeon_enc_slice_header_hevc; + enc->op_preset = radeon_enc_op_balance; } enc->cmd.session_info = RENCODE_IB_PARAM_SESSION_INFO;