From 30991bc414cf575816bcd76bec746bd7dce917e0 Mon Sep 17 00:00:00 2001 From: Tobias Schramm Date: Wed, 4 Mar 2020 20:05:58 +0100 Subject: [PATCH] staging/media: hantro: Enable rk3399 h264 support Probably not complete without some v4l patches --- .../media/uapi/v4l/ext-ctrls-codec.rst | 12 + drivers/staging/media/hantro/Makefile | 2 +- drivers/staging/media/hantro/hantro.h | 66 +- drivers/staging/media/hantro/hantro_drv.c | 11 +- .../staging/media/hantro/hantro_g1_h264_dec.c | 670 +++++++++++------- .../media/hantro/hantro_g1_mpeg2_dec.c | 12 +- drivers/staging/media/hantro/hantro_g1_regs.h | 53 -- .../staging/media/hantro/hantro_g1_vp8_dec.c | 10 +- .../staging/media/hantro/hantro_h1_jpeg_enc.c | 4 +- drivers/staging/media/hantro/hantro_h264.c | 138 ++-- drivers/staging/media/hantro/hantro_hw.h | 22 +- drivers/staging/media/hantro/hantro_v4l2.c | 109 +-- drivers/staging/media/hantro/hantro_vp8.c | 4 +- drivers/staging/media/hantro/rk3288_vpu_hw.c | 10 - drivers/staging/media/hantro/rk3399_vpu_hw.c | 24 +- .../media/hantro/rk3399_vpu_hw_h264_dec.c | 493 +++++++++++++ .../media/hantro/rk3399_vpu_hw_jpeg_enc.c | 4 +- .../media/hantro/rk3399_vpu_hw_mpeg2_dec.c | 10 +- .../media/hantro/rk3399_vpu_hw_vp8_dec.c | 8 +- include/media/h264-ctrls.h | 4 + 20 files changed, 1125 insertions(+), 541 deletions(-) create mode 100644 drivers/staging/media/hantro/rk3399_vpu_hw_h264_dec.c diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst index 28313c0f4e7cc..d472a54d1c4dc 100644 --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst @@ -2028,6 +2028,18 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - * - ``V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM`` - 0x00000004 - The DPB entry is a long term reference frame + * - ``V4L2_H264_DPB_ENTRY_FLAG_FIELD_PICTURE`` + - 0x00000008 + - The DPB entry is a field picture + * - ``V4L2_H264_DPB_ENTRY_FLAG_REF_TOP`` + - 0x00000010 + - The DPB entry is a top field reference + * - ``V4L2_H264_DPB_ENTRY_FLAG_REF_BOTTOM`` + - 0x00000020 + - The DPB entry is a bottom field reference + * - ``V4L2_H264_DPB_ENTRY_FLAG_REF_FRAME`` + - 0x00000030 + - The DPB entry is a reference frame ``V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE (enum)`` Specifies the decoding mode to use. Currently exposes slice-based and diff --git a/drivers/staging/media/hantro/Makefile b/drivers/staging/media/hantro/Makefile index 496b30c3c396c..8d33b0e8aa6cb 100644 --- a/drivers/staging/media/hantro/Makefile +++ b/drivers/staging/media/hantro/Makefile @@ -3,12 +3,12 @@ obj-$(CONFIG_VIDEO_HANTRO) += hantro-vpu.o hantro-vpu-y += \ hantro_drv.o \ hantro_v4l2.o \ - hantro_postproc.o \ hantro_h1_jpeg_enc.o \ hantro_g1_h264_dec.o \ hantro_g1_mpeg2_dec.o \ hantro_g1_vp8_dec.o \ rk3399_vpu_hw_jpeg_enc.o \ + rk3399_vpu_hw_h264_dec.o \ rk3399_vpu_hw_mpeg2_dec.o \ rk3399_vpu_hw_vp8_dec.o \ hantro_jpeg.o \ diff --git a/drivers/staging/media/hantro/hantro.h b/drivers/staging/media/hantro/hantro.h index b0faa43b3f79c..deb90ae37859a 100644 --- a/drivers/staging/media/hantro/hantro.h +++ b/drivers/staging/media/hantro/hantro.h @@ -60,8 +60,6 @@ struct hantro_irq { * @num_enc_fmts: Number of encoder formats. * @dec_fmts: Decoder formats. * @num_dec_fmts: Number of decoder formats. - * @postproc_fmts: Post-processor formats. - * @num_postproc_fmts: Number of post-processor formats. * @codec: Supported codecs * @codec_ops: Codec ops. * @init: Initialize hardware. @@ -72,7 +70,6 @@ struct hantro_irq { * @num_clocks: number of clocks in the array * @reg_names: array of register range names * @num_regs: number of register range names in the array - * @postproc_regs: &struct hantro_postproc_regs pointer */ struct hantro_variant { unsigned int enc_offset; @@ -81,8 +78,6 @@ struct hantro_variant { unsigned int num_enc_fmts; const struct hantro_fmt *dec_fmts; unsigned int num_dec_fmts; - const struct hantro_fmt *postproc_fmts; - unsigned int num_postproc_fmts; unsigned int codec; const struct hantro_codec_ops *codec_ops; int (*init)(struct hantro_dev *vpu); @@ -93,7 +88,6 @@ struct hantro_variant { int num_clocks; const char * const *reg_names; int num_regs; - const struct hantro_postproc_regs *postproc_regs; }; /** @@ -219,7 +213,6 @@ struct hantro_dev { * context, and it's called right before * calling v4l2_m2m_job_finish. * @codec_ops: Set of operations related to codec mode. - * @postproc: Post-processing context. * @jpeg_enc: JPEG-encoding context. * @mpeg2_dec: MPEG-2-decoding context. * @vp8_dec: VP8-decoding context. @@ -244,7 +237,6 @@ struct hantro_ctx { unsigned int bytesused); const struct hantro_codec_ops *codec_ops; - struct hantro_postproc_ctx postproc; /* Specific for particular codec modes. */ union { @@ -282,23 +274,6 @@ struct hantro_reg { u32 mask; }; -struct hantro_postproc_regs { - struct hantro_reg pipeline_en; - struct hantro_reg max_burst; - struct hantro_reg clk_gate; - struct hantro_reg out_swap32; - struct hantro_reg out_endian; - struct hantro_reg out_luma_base; - struct hantro_reg input_width; - struct hantro_reg input_height; - struct hantro_reg output_width; - struct hantro_reg output_height; - struct hantro_reg input_fmt; - struct hantro_reg output_fmt; - struct hantro_reg orig_width; - struct hantro_reg display_width; -}; - /* Logging helpers */ /** @@ -377,30 +352,16 @@ static inline u32 vdpu_read(struct hantro_dev *vpu, u32 reg) return val; } -static inline u32 vdpu_read_mask(struct hantro_dev *vpu, - const struct hantro_reg *reg, - u32 val) +static inline void hantro_reg_write(struct hantro_dev *vpu, + const struct hantro_reg *reg, + u32 val) { u32 v; v = vdpu_read(vpu, reg->base); v &= ~(reg->mask << reg->shift); v |= ((val & reg->mask) << reg->shift); - return v; -} - -static inline void hantro_reg_write(struct hantro_dev *vpu, - const struct hantro_reg *reg, - u32 val) -{ - vdpu_write_relaxed(vpu, vdpu_read_mask(vpu, reg, val), reg->base); -} - -static inline void hantro_reg_write_s(struct hantro_dev *vpu, - const struct hantro_reg *reg, - u32 val) -{ - vdpu_write(vpu, vdpu_read_mask(vpu, reg, val), reg->base); + vdpu_write_relaxed(vpu, v, reg->base); } bool hantro_is_encoder_ctx(const struct hantro_ctx *ctx); @@ -420,23 +381,4 @@ hantro_get_dst_buf(struct hantro_ctx *ctx) return v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); } -static inline bool -hantro_needs_postproc(struct hantro_ctx *ctx, const struct hantro_fmt *fmt) -{ - return fmt->fourcc != V4L2_PIX_FMT_NV12; -} - -static inline dma_addr_t -hantro_get_dec_buf_addr(struct hantro_ctx *ctx, struct vb2_buffer *vb) -{ - if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt)) - return ctx->postproc.dec_q[vb->index].dma; - return vb2_dma_contig_plane_dma_addr(vb, 0); -} - -void hantro_postproc_disable(struct hantro_ctx *ctx); -void hantro_postproc_enable(struct hantro_ctx *ctx); -void hantro_postproc_free(struct hantro_ctx *ctx); -int hantro_postproc_alloc(struct hantro_ctx *ctx); - #endif /* HANTRO_H_ */ diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c index 97c615a2f057f..26108c96b674b 100644 --- a/drivers/staging/media/hantro/hantro_drv.c +++ b/drivers/staging/media/hantro/hantro_drv.c @@ -53,7 +53,7 @@ dma_addr_t hantro_get_ref(struct hantro_ctx *ctx, u64 ts) if (index < 0) return 0; buf = vb2_get_buffer(q, index); - return hantro_get_dec_buf_addr(ctx, buf); + return vb2_dma_contig_plane_dma_addr(buf, 0); } static int @@ -152,21 +152,16 @@ void hantro_watchdog(struct work_struct *work) } } -void hantro_start_prepare_run(struct hantro_ctx *ctx) +void hantro_prepare_run(struct hantro_ctx *ctx) { struct vb2_v4l2_buffer *src_buf; src_buf = hantro_get_src_buf(ctx); v4l2_ctrl_request_setup(src_buf->vb2_buf.req_obj.req, &ctx->ctrl_handler); - - if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt)) - hantro_postproc_enable(ctx); - else - hantro_postproc_disable(ctx); } -void hantro_end_prepare_run(struct hantro_ctx *ctx) +void hantro_finish_run(struct hantro_ctx *ctx) { struct vb2_v4l2_buffer *src_buf; diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c b/drivers/staging/media/hantro/hantro_g1_h264_dec.c index 424c648ce9fc4..aeefa652e5a9c 100644 --- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c +++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Rockchip RK3288 VPU codec driver + * Hantro VPU codec driver * * Copyright (c) 2014 Rockchip Electronics Co., Ltd. * Hertz Wong @@ -15,248 +15,430 @@ #include -#include "hantro_g1_regs.h" #include "hantro_hw.h" #include "hantro_v4l2.h" -static void set_params(struct hantro_ctx *ctx) +#define G1_SWREG(nr) ((nr) * 4) + +#define G1_REG_RLC_VLC_BASE G1_SWREG(12) +#define G1_REG_DEC_OUT_BASE G1_SWREG(13) +#define G1_REG_REFER0_BASE G1_SWREG(14) +#define G1_REG_REFER1_BASE G1_SWREG(15) +#define G1_REG_REFER2_BASE G1_SWREG(16) +#define G1_REG_REFER3_BASE G1_SWREG(17) +#define G1_REG_REFER4_BASE G1_SWREG(18) +#define G1_REG_REFER5_BASE G1_SWREG(19) +#define G1_REG_REFER6_BASE G1_SWREG(20) +#define G1_REG_REFER7_BASE G1_SWREG(21) +#define G1_REG_REFER8_BASE G1_SWREG(22) +#define G1_REG_REFER9_BASE G1_SWREG(23) +#define G1_REG_REFER10_BASE G1_SWREG(24) +#define G1_REG_REFER11_BASE G1_SWREG(25) +#define G1_REG_REFER12_BASE G1_SWREG(26) +#define G1_REG_REFER13_BASE G1_SWREG(27) +#define G1_REG_REFER14_BASE G1_SWREG(28) +#define G1_REG_REFER15_BASE G1_SWREG(29) +#define G1_REG_QTABLE_BASE G1_SWREG(40) +#define G1_REG_DIR_MV_BASE G1_SWREG(41) +#define G1_REG_DEC_E(v) ((v) ? BIT(0) : 0) + +#define G1_REG_DEC_AXI_RD_ID(v) (((v) << 24) & GENMASK(31, 24)) +#define G1_REG_DEC_TIMEOUT_E(v) ((v) ? BIT(23) : 0) +#define G1_REG_DEC_STRSWAP32_E(v) ((v) ? BIT(22) : 0) +#define G1_REG_DEC_STRENDIAN_E(v) ((v) ? BIT(21) : 0) +#define G1_REG_DEC_INSWAP32_E(v) ((v) ? BIT(20) : 0) +#define G1_REG_DEC_OUTSWAP32_E(v) ((v) ? BIT(19) : 0) +#define G1_REG_DEC_DATA_DISC_E(v) ((v) ? BIT(18) : 0) +#define G1_REG_DEC_LATENCY(v) (((v) << 11) & GENMASK(16, 11)) +#define G1_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(10) : 0) +#define G1_REG_DEC_IN_ENDIAN(v) ((v) ? BIT(9) : 0) +#define G1_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(8) : 0) +#define G1_REG_DEC_ADV_PRE_DIS(v) ((v) ? BIT(6) : 0) +#define G1_REG_DEC_SCMD_DIS(v) ((v) ? BIT(5) : 0) +#define G1_REG_DEC_MAX_BURST(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_DEC_MODE(v) (((v) << 28) & GENMASK(31, 28)) +#define G1_REG_RLC_MODE_E(v) ((v) ? BIT(27) : 0) +#define G1_REG_PIC_INTERLACE_E(v) ((v) ? BIT(23) : 0) +#define G1_REG_PIC_FIELDMODE_E(v) ((v) ? BIT(22) : 0) +#define G1_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(19) : 0) +#define G1_REG_FILTERING_DIS(v) ((v) ? BIT(14) : 0) +#define G1_REG_PIC_FIXED_QUANT(v) ((v) ? BIT(13) : 0) +#define G1_REG_WRITE_MVS_E(v) ((v) ? BIT(12) : 0) +#define G1_REG_SEQ_MBAFF_E(v) ((v) ? BIT(10) : 0) +#define G1_REG_PICORD_COUNT_E(v) ((v) ? BIT(9) : 0) +#define G1_REG_DEC_AXI_WR_ID(v) (((v) << 0) & GENMASK(7, 0)) + +#define G1_REG_PIC_MB_WIDTH(v) (((v) << 23) & GENMASK(31, 23)) +#define G1_REG_PIC_MB_HEIGHT_P(v) (((v) << 11) & GENMASK(18, 11)) +#define G1_REG_REF_FRAMES(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_STRM_START_BIT(v) (((v) << 26) & GENMASK(31, 26)) +#define G1_REG_TYPE1_QUANT_E(v) ((v) ? BIT(24) : 0) +#define G1_REG_CH_QP_OFFSET(v) (((v) << 19) & GENMASK(23, 19)) +#define G1_REG_CH_QP_OFFSET2(v) (((v) << 14) & GENMASK(18, 14)) +#define G1_REG_FIELDPIC_FLAG_E(v) ((v) ? BIT(0) : 0) + +#define G1_REG_START_CODE_E(v) ((v) ? BIT(31) : 0) +#define G1_REG_INIT_QP(v) (((v) << 25) & GENMASK(30, 25)) +#define G1_REG_CH_8PIX_ILEAV_E(v) ((v) ? BIT(24) : 0) +#define G1_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0)) + +#define G1_REG_CABAC_E(v) ((v) ? BIT(31) : 0) +#define G1_REG_BLACKWHITE_E(v) ((v) ? BIT(30) : 0) +#define G1_REG_DIR_8X8_INFER_E(v) ((v) ? BIT(29) : 0) +#define G1_REG_WEIGHT_PRED_E(v) ((v) ? BIT(28) : 0) +#define G1_REG_WEIGHT_BIPR_IDC(v) (((v) << 26) & GENMASK(27, 26)) +#define G1_REG_FRAMENUM_LEN(v) (((v) << 16) & GENMASK(20, 16)) +#define G1_REG_FRAMENUM(v) (((v) << 0) & GENMASK(15, 0)) + +#define G1_REG_CONST_INTRA_E(v) ((v) ? BIT(31) : 0) +#define G1_REG_FILT_CTRL_PRES(v) ((v) ? BIT(30) : 0) +#define G1_REG_RDPIC_CNT_PRES(v) ((v) ? BIT(29) : 0) +#define G1_REG_8X8TRANS_FLAG_E(v) ((v) ? BIT(28) : 0) +#define G1_REG_REFPIC_MK_LEN(v) (((v) << 17) & GENMASK(27, 17)) +#define G1_REG_IDR_PIC_E(v) ((v) ? BIT(16) : 0) +#define G1_REG_IDR_PIC_ID(v) (((v) << 0) & GENMASK(15, 0)) + +#define G1_REG_PPS_ID(v) (((v) << 24) & GENMASK(31, 24)) +#define G1_REG_REFIDX1_ACTIVE(v) (((v) << 19) & GENMASK(23, 19)) +#define G1_REG_REFIDX0_ACTIVE(v) (((v) << 14) & GENMASK(18, 14)) +#define G1_REG_POC_LENGTH(v) (((v) << 0) & GENMASK(7, 0)) + +#define G1_REG_PINIT_RLIST_F9(v) (((v) << 25) & GENMASK(29, 25)) +#define G1_REG_PINIT_RLIST_F8(v) (((v) << 20) & GENMASK(24, 20)) +#define G1_REG_PINIT_RLIST_F7(v) (((v) << 15) & GENMASK(19, 15)) +#define G1_REG_PINIT_RLIST_F6(v) (((v) << 10) & GENMASK(14, 10)) +#define G1_REG_PINIT_RLIST_F5(v) (((v) << 5) & GENMASK(9, 5)) +#define G1_REG_PINIT_RLIST_F4(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_PINIT_RLIST_F15(v) (((v) << 25) & GENMASK(29, 25)) +#define G1_REG_PINIT_RLIST_F14(v) (((v) << 20) & GENMASK(24, 20)) +#define G1_REG_PINIT_RLIST_F13(v) (((v) << 15) & GENMASK(19, 15)) +#define G1_REG_PINIT_RLIST_F12(v) (((v) << 10) & GENMASK(14, 10)) +#define G1_REG_PINIT_RLIST_F11(v) (((v) << 5) & GENMASK(9, 5)) +#define G1_REG_PINIT_RLIST_F10(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_REFER1_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define G1_REG_REFER0_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define G1_REG_REFER3_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define G1_REG_REFER2_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define G1_REG_REFER5_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define G1_REG_REFER4_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define G1_REG_REFER7_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define G1_REG_REFER6_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define G1_REG_REFER9_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define G1_REG_REFER8_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define G1_REG_REFER11_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define G1_REG_REFER10_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define G1_REG_REFER13_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define G1_REG_REFER12_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define G1_REG_REFER15_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define G1_REG_REFER14_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define G1_REG_REFER_LTERM_E(v) (((v) << 0) & GENMASK(31, 0)) + +#define G1_REG_REFER_VALID_E(v) (((v) << 0) & GENMASK(31, 0)) + +#define G1_REG_BINIT_RLIST_B2(v) (((v) << 25) & GENMASK(29, 25)) +#define G1_REG_BINIT_RLIST_F2(v) (((v) << 20) & GENMASK(24, 20)) +#define G1_REG_BINIT_RLIST_B1(v) (((v) << 15) & GENMASK(19, 15)) +#define G1_REG_BINIT_RLIST_F1(v) (((v) << 10) & GENMASK(14, 10)) +#define G1_REG_BINIT_RLIST_B0(v) (((v) << 5) & GENMASK(9, 5)) +#define G1_REG_BINIT_RLIST_F0(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_BINIT_RLIST_B5(v) (((v) << 25) & GENMASK(29, 25)) +#define G1_REG_BINIT_RLIST_F5(v) (((v) << 20) & GENMASK(24, 20)) +#define G1_REG_BINIT_RLIST_B4(v) (((v) << 15) & GENMASK(19, 15)) +#define G1_REG_BINIT_RLIST_F4(v) (((v) << 10) & GENMASK(14, 10)) +#define G1_REG_BINIT_RLIST_B3(v) (((v) << 5) & GENMASK(9, 5)) +#define G1_REG_BINIT_RLIST_F3(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_BINIT_RLIST_B8(v) (((v) << 25) & GENMASK(29, 25)) +#define G1_REG_BINIT_RLIST_F8(v) (((v) << 20) & GENMASK(24, 20)) +#define G1_REG_BINIT_RLIST_B7(v) (((v) << 15) & GENMASK(19, 15)) +#define G1_REG_BINIT_RLIST_F7(v) (((v) << 10) & GENMASK(14, 10)) +#define G1_REG_BINIT_RLIST_B6(v) (((v) << 5) & GENMASK(9, 5)) +#define G1_REG_BINIT_RLIST_F6(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_BINIT_RLIST_B11(v) (((v) << 25) & GENMASK(29, 25)) +#define G1_REG_BINIT_RLIST_F11(v) (((v) << 20) & GENMASK(24, 20)) +#define G1_REG_BINIT_RLIST_B10(v) (((v) << 15) & GENMASK(19, 15)) +#define G1_REG_BINIT_RLIST_F10(v) (((v) << 10) & GENMASK(14, 10)) +#define G1_REG_BINIT_RLIST_B9(v) (((v) << 5) & GENMASK(9, 5)) +#define G1_REG_BINIT_RLIST_F9(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_BINIT_RLIST_B14(v) (((v) << 25) & GENMASK(29, 25)) +#define G1_REG_BINIT_RLIST_F14(v) (((v) << 20) & GENMASK(24, 20)) +#define G1_REG_BINIT_RLIST_B13(v) (((v) << 15) & GENMASK(19, 15)) +#define G1_REG_BINIT_RLIST_F13(v) (((v) << 10) & GENMASK(14, 10)) +#define G1_REG_BINIT_RLIST_B12(v) (((v) << 5) & GENMASK(9, 5)) +#define G1_REG_BINIT_RLIST_F12(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_PINIT_RLIST_F3(v) (((v) << 25) & GENMASK(29, 25)) +#define G1_REG_PINIT_RLIST_F2(v) (((v) << 20) & GENMASK(24, 20)) +#define G1_REG_PINIT_RLIST_F1(v) (((v) << 15) & GENMASK(19, 15)) +#define G1_REG_PINIT_RLIST_F0(v) (((v) << 10) & GENMASK(14, 10)) +#define G1_REG_BINIT_RLIST_B15(v) (((v) << 5) & GENMASK(9, 5)) +#define G1_REG_BINIT_RLIST_F15(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_STARTMB_X(v) (((v) << 23) & GENMASK(31, 23)) +#define G1_REG_STARTMB_Y(v) (((v) << 15) & GENMASK(22, 15)) + +#define G1_REG_PRED_BC_TAP_0_0(v) (((v) << 22) & GENMASK(31, 22)) +#define G1_REG_PRED_BC_TAP_0_1(v) (((v) << 12) & GENMASK(21, 12)) +#define G1_REG_PRED_BC_TAP_0_2(v) (((v) << 2) & GENMASK(11, 2)) + +#define G1_REG_REFBU_E(v) ((v) ? BIT(31) : 0) + +#define G1_REG_APF_THRESHOLD(v) (((v) << 0) & GENMASK(13, 0)) + +void hantro_g1_h264_dec_run(struct hantro_ctx *ctx) { - const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; - const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode; - const struct v4l2_ctrl_h264_slice_params *slices = ctrls->slices; - const struct v4l2_ctrl_h264_sps *sps = ctrls->sps; - const struct v4l2_ctrl_h264_pps *pps = ctrls->pps; - struct vb2_v4l2_buffer *src_buf = hantro_get_src_buf(ctx); struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *src_buf, *dst_buf; + const struct hantro_h264_dec_ctrls *ctrls; + const struct v4l2_ctrl_h264_decode_params *decode; + const struct v4l2_ctrl_h264_slice_params *slices; + const struct v4l2_ctrl_h264_sps *sps; + const struct v4l2_ctrl_h264_pps *pps; + const u8 *b0_reflist, *b1_reflist, *p_reflist; + dma_addr_t addr; + size_t offset = 0; u32 reg; - /* Decoder control register 0. */ - reg = G1_REG_DEC_CTRL0_DEC_AXI_WR_ID(0x0); - if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) - reg |= G1_REG_DEC_CTRL0_SEQ_MBAFF_E; - if (sps->profile_idc > 66) { - reg |= G1_REG_DEC_CTRL0_PICORD_COUNT_E; - if (dec_param->nal_ref_idc) - reg |= G1_REG_DEC_CTRL0_WRITE_MVS_E; - } + /* Prepare the H264 decoder context. */ + if (hantro_h264_dec_prepare_run(ctx)) + return; - if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) && - (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD || - slices[0].flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)) - reg |= G1_REG_DEC_CTRL0_PIC_INTERLACE_E; - if (slices[0].flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) - reg |= G1_REG_DEC_CTRL0_PIC_FIELDMODE_E; - if (!(slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)) - reg |= G1_REG_DEC_CTRL0_PIC_TOPFIELD_E; - vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL0); - - /* Decoder control register 1. */ - reg = G1_REG_DEC_CTRL1_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)) | - G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) | - G1_REG_DEC_CTRL1_REF_FRAMES(sps->max_num_ref_frames); - vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL1); - - /* Decoder control register 2. */ - reg = G1_REG_DEC_CTRL2_CH_QP_OFFSET(pps->chroma_qp_index_offset) | - G1_REG_DEC_CTRL2_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset); - - /* always use the matrix sent from userspace */ - reg |= G1_REG_DEC_CTRL2_TYPE1_QUANT_E; - - if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)) - reg |= G1_REG_DEC_CTRL2_FIELDPIC_FLAG_E; - vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL2); - - /* Decoder control register 3. */ - reg = G1_REG_DEC_CTRL3_START_CODE_E | - G1_REG_DEC_CTRL3_INIT_QP(pps->pic_init_qp_minus26 + 26) | - G1_REG_DEC_CTRL3_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0)); - vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL3); - - /* Decoder control register 4. */ - reg = G1_REG_DEC_CTRL4_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) | - G1_REG_DEC_CTRL4_FRAMENUM(slices[0].frame_num) | - G1_REG_DEC_CTRL4_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc); - if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) - reg |= G1_REG_DEC_CTRL4_CABAC_E; - if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) - reg |= G1_REG_DEC_CTRL4_DIR_8X8_INFER_E; - if (sps->profile_idc >= 100 && sps->chroma_format_idc == 0) - reg |= G1_REG_DEC_CTRL4_BLACKWHITE_E; - if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) - reg |= G1_REG_DEC_CTRL4_WEIGHT_PRED_E; - vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL4); - - /* Decoder control register 5. */ - reg = G1_REG_DEC_CTRL5_REFPIC_MK_LEN(slices[0].dec_ref_pic_marking_bit_size) | - G1_REG_DEC_CTRL5_IDR_PIC_ID(slices[0].idr_pic_id); - if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) - reg |= G1_REG_DEC_CTRL5_CONST_INTRA_E; - if (pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) - reg |= G1_REG_DEC_CTRL5_FILT_CTRL_PRES; - if (pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT) - reg |= G1_REG_DEC_CTRL5_RDPIC_CNT_PRES; - if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) - reg |= G1_REG_DEC_CTRL5_8X8TRANS_FLAG_E; - if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) - reg |= G1_REG_DEC_CTRL5_IDR_PIC_E; - vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL5); - - /* Decoder control register 6. */ - reg = G1_REG_DEC_CTRL6_PPS_ID(slices[0].pic_parameter_set_id) | - G1_REG_DEC_CTRL6_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) | - G1_REG_DEC_CTRL6_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) | - G1_REG_DEC_CTRL6_POC_LENGTH(slices[0].pic_order_cnt_bit_size); - vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL6); - - /* Error concealment register. */ - vdpu_write_relaxed(vpu, 0, G1_REG_ERR_CONC); - - /* Prediction filter tap register. */ - vdpu_write_relaxed(vpu, - G1_REG_PRED_FLT_PRED_BC_TAP_0_0(1) | - G1_REG_PRED_FLT_PRED_BC_TAP_0_1(-5 & 0x3ff) | - G1_REG_PRED_FLT_PRED_BC_TAP_0_2(20), - G1_REG_PRED_FLT); - - /* Reference picture buffer control register. */ - vdpu_write_relaxed(vpu, 0, G1_REG_REF_BUF_CTRL); - - /* Reference picture buffer control register 2. */ - vdpu_write_relaxed(vpu, G1_REG_REF_BUF_CTRL2_APF_THRESHOLD(8), - G1_REG_REF_BUF_CTRL2); -} + src_buf = hantro_get_src_buf(ctx); + dst_buf = hantro_get_dst_buf(ctx); -static void set_ref(struct hantro_ctx *ctx) -{ - struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; - const u8 *b0_reflist, *b1_reflist, *p_reflist; - struct hantro_dev *vpu = ctx->dev; - u32 dpb_longterm = 0; - u32 dpb_valid = 0; - int reg_num; - u32 reg; - int i; - - /* - * Set up bit maps of valid and long term DPBs. - * NOTE: The bits are reversed, i.e. MSb is DPB 0. - */ - for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) { - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) - dpb_valid |= BIT(HANTRO_H264_DPB_SIZE - 1 - i); - - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) - dpb_longterm |= BIT(HANTRO_H264_DPB_SIZE - 1 - i); - } - vdpu_write_relaxed(vpu, dpb_valid << 16, G1_REG_VALID_REF); - vdpu_write_relaxed(vpu, dpb_longterm << 16, G1_REG_LT_REF); - - /* - * Set up reference frame picture numbers. - * - * Each G1_REG_REF_PIC(x) register contains numbers of two - * subsequential reference pictures. - */ - for (i = 0; i < HANTRO_H264_DPB_SIZE; i += 2) { - reg = 0; - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) - reg |= G1_REG_REF_PIC_REFER0_NBR(dpb[i].pic_num); - else - reg |= G1_REG_REF_PIC_REFER0_NBR(dpb[i].frame_num); - - if (dpb[i + 1].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) - reg |= G1_REG_REF_PIC_REFER1_NBR(dpb[i + 1].pic_num); - else - reg |= G1_REG_REF_PIC_REFER1_NBR(dpb[i + 1].frame_num); - - vdpu_write_relaxed(vpu, reg, G1_REG_REF_PIC(i / 2)); - } + ctrls = &ctx->h264_dec.ctrls; + decode = ctrls->decode; + slices = ctrls->slices; + sps = ctrls->sps; + pps = ctrls->pps; b0_reflist = ctx->h264_dec.reflists.b0; b1_reflist = ctx->h264_dec.reflists.b1; p_reflist = ctx->h264_dec.reflists.p; - /* - * Each G1_REG_BD_REF_PIC(x) register contains three entries - * of each forward and backward picture list. - */ - reg_num = 0; - for (i = 0; i < 15; i += 3) { - reg = G1_REG_BD_REF_PIC_BINIT_RLIST_F0(b0_reflist[i]) | - G1_REG_BD_REF_PIC_BINIT_RLIST_F1(b0_reflist[i + 1]) | - G1_REG_BD_REF_PIC_BINIT_RLIST_F2(b0_reflist[i + 2]) | - G1_REG_BD_REF_PIC_BINIT_RLIST_B0(b1_reflist[i]) | - G1_REG_BD_REF_PIC_BINIT_RLIST_B1(b1_reflist[i + 1]) | - G1_REG_BD_REF_PIC_BINIT_RLIST_B2(b1_reflist[i + 2]); - vdpu_write_relaxed(vpu, reg, G1_REG_BD_REF_PIC(reg_num++)); - } - - /* - * G1_REG_BD_P_REF_PIC register contains last entries (index 15) - * of forward and backward reference picture lists and first 4 entries - * of P forward picture list. - */ - reg = G1_REG_BD_P_REF_PIC_BINIT_RLIST_F15(b0_reflist[15]) | - G1_REG_BD_P_REF_PIC_BINIT_RLIST_B15(b1_reflist[15]) | - G1_REG_BD_P_REF_PIC_PINIT_RLIST_F0(p_reflist[0]) | - G1_REG_BD_P_REF_PIC_PINIT_RLIST_F1(p_reflist[1]) | - G1_REG_BD_P_REF_PIC_PINIT_RLIST_F2(p_reflist[2]) | - G1_REG_BD_P_REF_PIC_PINIT_RLIST_F3(p_reflist[3]); - vdpu_write_relaxed(vpu, reg, G1_REG_BD_P_REF_PIC); - - /* - * Each G1_REG_FWD_PIC(x) register contains six consecutive - * entries of P forward picture list, starting from index 4. - */ - reg_num = 0; - for (i = 4; i < HANTRO_H264_DPB_SIZE; i += 6) { - reg = G1_REG_FWD_PIC_PINIT_RLIST_F0(p_reflist[i]) | - G1_REG_FWD_PIC_PINIT_RLIST_F1(p_reflist[i + 1]) | - G1_REG_FWD_PIC_PINIT_RLIST_F2(p_reflist[i + 2]) | - G1_REG_FWD_PIC_PINIT_RLIST_F3(p_reflist[i + 3]) | - G1_REG_FWD_PIC_PINIT_RLIST_F4(p_reflist[i + 4]) | - G1_REG_FWD_PIC_PINIT_RLIST_F5(p_reflist[i + 5]); - vdpu_write_relaxed(vpu, reg, G1_REG_FWD_PIC(reg_num++)); - } + reg = G1_REG_DEC_AXI_RD_ID(0xff) | + G1_REG_DEC_TIMEOUT_E(1) | + G1_REG_DEC_STRSWAP32_E(1) | + G1_REG_DEC_STRENDIAN_E(1) | + G1_REG_DEC_INSWAP32_E(1) | + G1_REG_DEC_OUTSWAP32_E(1) | + G1_REG_DEC_DATA_DISC_E(0) | + G1_REG_DEC_LATENCY(0) | + G1_REG_DEC_CLK_GATE_E(1) | + G1_REG_DEC_IN_ENDIAN(0) | + G1_REG_DEC_OUT_ENDIAN(1) | + G1_REG_DEC_ADV_PRE_DIS(0) | + G1_REG_DEC_SCMD_DIS(0) | + G1_REG_DEC_MAX_BURST(16); + vdpu_write_relaxed(vpu, reg, G1_SWREG(2)); + + reg = G1_REG_DEC_MODE(0) | + G1_REG_RLC_MODE_E(0) | + G1_REG_PIC_INTERLACE_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) && (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD || slices[0].flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)) | + G1_REG_PIC_FIELDMODE_E(slices[0].flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) | + G1_REG_PIC_TOPFIELD_E(!(slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)) | + G1_REG_FILTERING_DIS(0) | + G1_REG_PIC_FIXED_QUANT(0) | + G1_REG_WRITE_MVS_E(sps->profile_idc > 66 && decode->nal_ref_idc) | + G1_REG_SEQ_MBAFF_E(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) | + G1_REG_PICORD_COUNT_E(sps->profile_idc > 66) | + G1_REG_DEC_AXI_WR_ID(0); + vdpu_write_relaxed(vpu, reg, G1_SWREG(3)); + + reg = G1_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)) | + G1_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) | + G1_REG_REF_FRAMES(sps->max_num_ref_frames); + vdpu_write_relaxed(vpu, reg, G1_SWREG(4)); + + reg = G1_REG_STRM_START_BIT(0) | + G1_REG_TYPE1_QUANT_E(1) | + G1_REG_CH_QP_OFFSET(pps->chroma_qp_index_offset) | + G1_REG_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset) | + G1_REG_FIELDPIC_FLAG_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(5)); + + reg = G1_REG_START_CODE_E(1) | + G1_REG_INIT_QP(pps->pic_init_qp_minus26 + 26) | + G1_REG_CH_8PIX_ILEAV_E(0) | + G1_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(6)); + + reg = G1_REG_CABAC_E(pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) | + G1_REG_BLACKWHITE_E(sps->profile_idc >= 100 && sps->chroma_format_idc == 0) | + G1_REG_DIR_8X8_INFER_E(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) | + G1_REG_WEIGHT_PRED_E(pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) | + G1_REG_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc) | + G1_REG_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) | + G1_REG_FRAMENUM(slices[0].frame_num); + vdpu_write_relaxed(vpu, reg, G1_SWREG(7)); + + reg = G1_REG_CONST_INTRA_E(pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) | + G1_REG_FILT_CTRL_PRES(pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) | + G1_REG_RDPIC_CNT_PRES(pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT) | + G1_REG_8X8TRANS_FLAG_E(pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) | + G1_REG_REFPIC_MK_LEN(slices[0].dec_ref_pic_marking_bit_size) | + G1_REG_IDR_PIC_E(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) | + G1_REG_IDR_PIC_ID(slices[0].idr_pic_id); + vdpu_write_relaxed(vpu, reg, G1_SWREG(8)); + + reg = G1_REG_PPS_ID(slices[0].pic_parameter_set_id) | + G1_REG_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) | + G1_REG_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) | + G1_REG_POC_LENGTH(slices[0].pic_order_cnt_bit_size); + vdpu_write_relaxed(vpu, reg, G1_SWREG(9)); + + reg = G1_REG_PINIT_RLIST_F9(p_reflist[9]) | + G1_REG_PINIT_RLIST_F8(p_reflist[8]) | + G1_REG_PINIT_RLIST_F7(p_reflist[7]) | + G1_REG_PINIT_RLIST_F6(p_reflist[6]) | + G1_REG_PINIT_RLIST_F5(p_reflist[5]) | + G1_REG_PINIT_RLIST_F4(p_reflist[4]); + vdpu_write_relaxed(vpu, reg, G1_SWREG(10)); + + reg = G1_REG_PINIT_RLIST_F15(p_reflist[15]) | + G1_REG_PINIT_RLIST_F14(p_reflist[14]) | + G1_REG_PINIT_RLIST_F13(p_reflist[13]) | + G1_REG_PINIT_RLIST_F12(p_reflist[12]) | + G1_REG_PINIT_RLIST_F11(p_reflist[11]) | + G1_REG_PINIT_RLIST_F10(p_reflist[10]); + vdpu_write_relaxed(vpu, reg, G1_SWREG(11)); + + reg = G1_REG_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, 1)) | + G1_REG_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, 0)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(30)); + + reg = G1_REG_REFER3_NBR(hantro_h264_get_ref_nbr(ctx, 3)) | + G1_REG_REFER2_NBR(hantro_h264_get_ref_nbr(ctx, 2)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(31)); + + reg = G1_REG_REFER5_NBR(hantro_h264_get_ref_nbr(ctx, 5)) | + G1_REG_REFER4_NBR(hantro_h264_get_ref_nbr(ctx, 4)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(32)); + + reg = G1_REG_REFER7_NBR(hantro_h264_get_ref_nbr(ctx, 7)) | + G1_REG_REFER6_NBR(hantro_h264_get_ref_nbr(ctx, 6)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(33)); + + reg = G1_REG_REFER9_NBR(hantro_h264_get_ref_nbr(ctx, 9)) | + G1_REG_REFER8_NBR(hantro_h264_get_ref_nbr(ctx, 8)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(34)); + + reg = G1_REG_REFER11_NBR(hantro_h264_get_ref_nbr(ctx, 11)) | + G1_REG_REFER10_NBR(hantro_h264_get_ref_nbr(ctx, 10)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(35)); + + reg = G1_REG_REFER13_NBR(hantro_h264_get_ref_nbr(ctx, 13)) | + G1_REG_REFER12_NBR(hantro_h264_get_ref_nbr(ctx, 12)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(36)); + + reg = G1_REG_REFER15_NBR(hantro_h264_get_ref_nbr(ctx, 15)) | + G1_REG_REFER14_NBR(hantro_h264_get_ref_nbr(ctx, 14)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(37)); + + reg = G1_REG_REFER_LTERM_E(ctx->h264_dec.dpb_longterm); + vdpu_write_relaxed(vpu, reg, G1_SWREG(38)); + + reg = G1_REG_REFER_VALID_E(ctx->h264_dec.dpb_valid); + vdpu_write_relaxed(vpu, reg, G1_SWREG(39)); + + reg = G1_REG_BINIT_RLIST_B2(b1_reflist[2]) | + G1_REG_BINIT_RLIST_F2(b0_reflist[2]) | + G1_REG_BINIT_RLIST_B1(b1_reflist[1]) | + G1_REG_BINIT_RLIST_F1(b0_reflist[1]) | + G1_REG_BINIT_RLIST_B0(b1_reflist[0]) | + G1_REG_BINIT_RLIST_F0(b0_reflist[0]); + vdpu_write_relaxed(vpu, reg, G1_SWREG(42)); + + reg = G1_REG_BINIT_RLIST_B5(b1_reflist[5]) | + G1_REG_BINIT_RLIST_F5(b0_reflist[5]) | + G1_REG_BINIT_RLIST_B4(b1_reflist[4]) | + G1_REG_BINIT_RLIST_F4(b0_reflist[4]) | + G1_REG_BINIT_RLIST_B3(b1_reflist[3]) | + G1_REG_BINIT_RLIST_F3(b0_reflist[3]); + vdpu_write_relaxed(vpu, reg, G1_SWREG(43)); + + reg = G1_REG_BINIT_RLIST_B8(b1_reflist[8]) | + G1_REG_BINIT_RLIST_F8(b0_reflist[8]) | + G1_REG_BINIT_RLIST_B7(b1_reflist[7]) | + G1_REG_BINIT_RLIST_F7(b0_reflist[7]) | + G1_REG_BINIT_RLIST_B6(b1_reflist[6]) | + G1_REG_BINIT_RLIST_F6(b0_reflist[6]); + vdpu_write_relaxed(vpu, reg, G1_SWREG(44)); + + reg = G1_REG_BINIT_RLIST_B11(b1_reflist[11]) | + G1_REG_BINIT_RLIST_F11(b0_reflist[11]) | + G1_REG_BINIT_RLIST_B10(b1_reflist[10]) | + G1_REG_BINIT_RLIST_F10(b0_reflist[10]) | + G1_REG_BINIT_RLIST_B9(b1_reflist[9]) | + G1_REG_BINIT_RLIST_F9(b0_reflist[9]); + vdpu_write_relaxed(vpu, reg, G1_SWREG(45)); + + reg = G1_REG_BINIT_RLIST_B14(b1_reflist[14]) | + G1_REG_BINIT_RLIST_F14(b0_reflist[14]) | + G1_REG_BINIT_RLIST_B13(b1_reflist[13]) | + G1_REG_BINIT_RLIST_F13(b0_reflist[13]) | + G1_REG_BINIT_RLIST_B12(b1_reflist[12]) | + G1_REG_BINIT_RLIST_F12(b0_reflist[12]); + vdpu_write_relaxed(vpu, reg, G1_SWREG(46)); + + reg = G1_REG_PINIT_RLIST_F3(p_reflist[3]) | + G1_REG_PINIT_RLIST_F2(p_reflist[2]) | + G1_REG_PINIT_RLIST_F1(p_reflist[1]) | + G1_REG_PINIT_RLIST_F0(p_reflist[0]) | + G1_REG_BINIT_RLIST_B15(b1_reflist[15]) | + G1_REG_BINIT_RLIST_F15(b0_reflist[15]); + vdpu_write_relaxed(vpu, reg, G1_SWREG(47)); + + reg = G1_REG_STARTMB_X(0) | + G1_REG_STARTMB_Y(0); + vdpu_write_relaxed(vpu, reg, G1_SWREG(48)); + + reg = G1_REG_PRED_BC_TAP_0_0(1) | + G1_REG_PRED_BC_TAP_0_1((u32)-5) | + G1_REG_PRED_BC_TAP_0_2(20); + vdpu_write_relaxed(vpu, reg, G1_SWREG(49)); + + reg = G1_REG_REFBU_E(0); + vdpu_write_relaxed(vpu, reg, G1_SWREG(51)); + + reg = G1_REG_APF_THRESHOLD(8); + vdpu_write_relaxed(vpu, reg, G1_SWREG(55)); - /* Set up addresses of DPB buffers. */ - for (i = 0; i < HANTRO_H264_DPB_SIZE; i++) { - dma_addr_t dma_addr = hantro_h264_get_ref_buf(ctx, i); - - vdpu_write_relaxed(vpu, dma_addr, G1_REG_ADDR_REF(i)); - } -} - -static void set_buffers(struct hantro_ctx *ctx) -{ - const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; - struct vb2_v4l2_buffer *src_buf, *dst_buf; - struct hantro_dev *vpu = ctx->dev; - dma_addr_t src_dma, dst_dma; - size_t offset = 0; - - src_buf = hantro_get_src_buf(ctx); - dst_buf = hantro_get_dst_buf(ctx); + /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */ + vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, G1_REG_QTABLE_BASE); /* Source (stream) buffer. */ - src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); - vdpu_write_relaxed(vpu, src_dma, G1_REG_ADDR_STR); + addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + vdpu_write_relaxed(vpu, addr, G1_REG_RLC_VLC_BASE); /* Destination (decoded frame) buffer. */ - dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf); + addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); /* Adjust dma addr to start at second line for bottom field */ - if (ctrls->slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD) + if (slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD) offset = ALIGN(ctx->src_fmt.width, MB_DIM); - vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DST); + vdpu_write_relaxed(vpu, addr + offset, G1_REG_DEC_OUT_BASE); /* Higher profiles require DMV buffer appended to reference frames. */ - if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) { + if (sps->profile_idc > 66 && decode->nal_ref_idc) { unsigned int bytes_per_mb = 384; /* DMV buffer for monochrome start directly after Y-plane */ - if (ctrls->sps->profile_idc >= 100 && - ctrls->sps->chroma_format_idc == 0) + if (sps->profile_idc >= 100 && sps->chroma_format_idc == 0) bytes_per_mb = 256; offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) * MB_HEIGHT(ctx->src_fmt.height); @@ -265,42 +447,32 @@ static void set_buffers(struct hantro_ctx *ctx) * DMV buffer is split in two for field encoded frames, * adjust offset for bottom field */ - if (ctrls->slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD) + if (slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD) offset += 32 * MB_WIDTH(ctx->src_fmt.width) * MB_HEIGHT(ctx->src_fmt.height); - vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DIR_MV); + vdpu_write_relaxed(vpu, addr + offset, G1_REG_DIR_MV_BASE); } - /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */ - vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, G1_REG_ADDR_QTABLE); -} - -void hantro_g1_h264_dec_run(struct hantro_ctx *ctx) -{ - struct hantro_dev *vpu = ctx->dev; - - /* Prepare the H264 decoder context. */ - if (hantro_h264_dec_prepare_run(ctx)) - return; - - /* Configure hardware registers. */ - set_params(ctx); - set_ref(ctx); - set_buffers(ctx); - - hantro_end_prepare_run(ctx); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 0), G1_REG_REFER0_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 1), G1_REG_REFER1_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 2), G1_REG_REFER2_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 3), G1_REG_REFER3_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 4), G1_REG_REFER4_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 5), G1_REG_REFER5_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 6), G1_REG_REFER6_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 7), G1_REG_REFER7_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 8), G1_REG_REFER8_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 9), G1_REG_REFER9_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 10), G1_REG_REFER10_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 11), G1_REG_REFER11_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 12), G1_REG_REFER12_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 13), G1_REG_REFER13_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 14), G1_REG_REFER14_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 15), G1_REG_REFER15_BASE); + + hantro_finish_run(ctx); /* Start decoding! */ - vdpu_write_relaxed(vpu, - G1_REG_CONFIG_DEC_AXI_RD_ID(0xffu) | - G1_REG_CONFIG_DEC_TIMEOUT_E | - G1_REG_CONFIG_DEC_OUT_ENDIAN | - G1_REG_CONFIG_DEC_STRENDIAN_E | - G1_REG_CONFIG_DEC_MAX_BURST(16) | - G1_REG_CONFIG_DEC_OUTSWAP32_E | - G1_REG_CONFIG_DEC_INSWAP32_E | - G1_REG_CONFIG_DEC_STRSWAP32_E | - G1_REG_CONFIG_DEC_CLK_GATE_E, - G1_REG_CONFIG); - vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT); + reg = G1_REG_DEC_E(1); + vdpu_write(vpu, reg, G1_SWREG(1)); } diff --git a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c index 24041849384a4..c20d74d4a55e3 100644 --- a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c +++ b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c @@ -121,11 +121,11 @@ hantro_g1_mpeg2_dec_set_buffers(struct hantro_dev *vpu, struct hantro_ctx *ctx, vdpu_write_relaxed(vpu, addr, G1_REG_RLC_VLC_BASE); /* Destination frame buffer */ - addr = hantro_get_dec_buf_addr(ctx, dst_buf); + addr = vb2_dma_contig_plane_dma_addr(dst_buf, 0); current_addr = addr; if (picture->picture_structure == PICT_BOTTOM_FIELD) - addr += ALIGN(ctx->dst_fmt.width, 16); + addr += ALIGN(ctx->src_fmt.width, MB_DIM); vdpu_write_relaxed(vpu, addr, G1_REG_DEC_OUT_BASE); if (!forward_addr) @@ -168,7 +168,7 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx) dst_buf = hantro_get_dst_buf(ctx); /* Apply request controls if any */ - hantro_start_prepare_run(ctx); + hantro_prepare_run(ctx); slice_params = hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS); @@ -204,8 +204,8 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx) G1_REG_DEC_AXI_WR_ID(0); vdpu_write_relaxed(vpu, reg, G1_SWREG(3)); - reg = G1_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->dst_fmt.width)) | - G1_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->dst_fmt.height)) | + reg = G1_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)) | + G1_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) | G1_REG_ALT_SCAN_E(picture->alternate_scan) | G1_REG_TOPFIELDFIRST_E(picture->top_field_first); vdpu_write_relaxed(vpu, reg, G1_SWREG(4)); @@ -244,7 +244,7 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx) &dst_buf->vb2_buf, sequence, picture, slice_params); - hantro_end_prepare_run(ctx); + hantro_finish_run(ctx); reg = G1_REG_DEC_E(1); vdpu_write(vpu, reg, G1_SWREG(1)); diff --git a/drivers/staging/media/hantro/hantro_g1_regs.h b/drivers/staging/media/hantro/hantro_g1_regs.h index c1756e3d53918..5c0ea79943360 100644 --- a/drivers/staging/media/hantro/hantro_g1_regs.h +++ b/drivers/staging/media/hantro/hantro_g1_regs.h @@ -9,8 +9,6 @@ #ifndef HANTRO_G1_REGS_H_ #define HANTRO_G1_REGS_H_ -#define G1_SWREG(nr) ((nr) * 4) - /* Decoder registers. */ #define G1_REG_INTERRUPT 0x004 #define G1_REG_INTERRUPT_DEC_PIC_INF BIT(24) @@ -300,55 +298,4 @@ #define G1_REG_REF_BUF_CTRL2_APF_THRESHOLD(x) (((x) & 0x3fff) << 0) #define G1_REG_SOFT_RESET 0x194 -/* Post-processor registers. */ -#define G1_REG_PP_INTERRUPT G1_SWREG(60) -#define G1_REG_PP_READY_IRQ BIT(12) -#define G1_REG_PP_IRQ BIT(8) -#define G1_REG_PP_IRQ_DIS BIT(4) -#define G1_REG_PP_PIPELINE_EN BIT(1) -#define G1_REG_PP_EXTERNAL_TRIGGER BIT(0) -#define G1_REG_PP_DEV_CONFIG G1_SWREG(61) -#define G1_REG_PP_AXI_RD_ID(v) (((v) << 24) & GENMASK(31, 24)) -#define G1_REG_PP_AXI_WR_ID(v) (((v) << 16) & GENMASK(23, 16)) -#define G1_REG_PP_INSWAP32_E(v) ((v) ? BIT(10) : 0) -#define G1_REG_PP_DATA_DISC_E(v) ((v) ? BIT(9) : 0) -#define G1_REG_PP_CLK_GATE_E(v) ((v) ? BIT(8) : 0) -#define G1_REG_PP_IN_ENDIAN(v) ((v) ? BIT(7) : 0) -#define G1_REG_PP_OUT_ENDIAN(v) ((v) ? BIT(6) : 0) -#define G1_REG_PP_OUTSWAP32_E(v) ((v) ? BIT(5) : 0) -#define G1_REG_PP_MAX_BURST(v) (((v) << 0) & GENMASK(4, 0)) -#define G1_REG_PP_IN_LUMA_BASE G1_SWREG(63) -#define G1_REG_PP_IN_CB_BASE G1_SWREG(64) -#define G1_REG_PP_IN_CR_BASE G1_SWREG(65) -#define G1_REG_PP_OUT_LUMA_BASE G1_SWREG(66) -#define G1_REG_PP_OUT_CHROMA_BASE G1_SWREG(67) -#define G1_REG_PP_CONTRAST_ADJUST G1_SWREG(68) -#define G1_REG_PP_COLOR_CONVERSION G1_SWREG(69) -#define G1_REG_PP_COLOR_CONVERSION0 G1_SWREG(70) -#define G1_REG_PP_COLOR_CONVERSION1 G1_SWREG(71) -#define G1_REG_PP_INPUT_SIZE G1_SWREG(72) -#define G1_REG_PP_INPUT_SIZE_HEIGHT(v) (((v) << 9) & GENMASK(16, 9)) -#define G1_REG_PP_INPUT_SIZE_WIDTH(v) (((v) << 0) & GENMASK(8, 0)) -#define G1_REG_PP_SCALING0 G1_SWREG(79) -#define G1_REG_PP_PADD_R(v) (((v) << 23) & GENMASK(27, 23)) -#define G1_REG_PP_PADD_G(v) (((v) << 18) & GENMASK(22, 18)) -#define G1_REG_PP_RANGEMAP_Y(v) ((v) ? BIT(31) : 0) -#define G1_REG_PP_RANGEMAP_C(v) ((v) ? BIT(30) : 0) -#define G1_REG_PP_YCBCR_RANGE(v) ((v) ? BIT(29) : 0) -#define G1_REG_PP_RGB_16(v) ((v) ? BIT(28) : 0) -#define G1_REG_PP_SCALING1 G1_SWREG(80) -#define G1_REG_PP_PADD_B(v) (((v) << 18) & GENMASK(22, 18)) -#define G1_REG_PP_MASK_R G1_SWREG(82) -#define G1_REG_PP_MASK_G G1_SWREG(83) -#define G1_REG_PP_MASK_B G1_SWREG(84) -#define G1_REG_PP_CONTROL G1_SWREG(85) -#define G1_REG_PP_CONTROL_IN_FMT(v) (((v) << 29) & GENMASK(31, 29)) -#define G1_REG_PP_CONTROL_OUT_FMT(v) (((v) << 26) & GENMASK(28, 26)) -#define G1_REG_PP_CONTROL_OUT_HEIGHT(v) (((v) << 15) & GENMASK(25, 15)) -#define G1_REG_PP_CONTROL_OUT_WIDTH(v) (((v) << 4) & GENMASK(14, 4)) -#define G1_REG_PP_MASK1_ORIG_WIDTH G1_SWREG(88) -#define G1_REG_PP_ORIG_WIDTH(v) (((v) << 23) & GENMASK(31, 23)) -#define G1_REG_PP_DISPLAY_WIDTH G1_SWREG(92) -#define G1_REG_PP_FUSE G1_SWREG(99) - #endif /* HANTRO_G1_REGS_H_ */ diff --git a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c index a5cdf150cd16c..5acc1b14f734d 100644 --- a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c +++ b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c @@ -422,7 +422,7 @@ static void cfg_buffers(struct hantro_ctx *ctx, } vdpu_write_relaxed(vpu, reg, G1_REG_FWD_PIC(0)); - dst_dma = hantro_get_dec_buf_addr(ctx, &vb2_dst->vb2_buf); + dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); vdpu_write_relaxed(vpu, dst_dma, G1_REG_ADDR_DST); } @@ -430,12 +430,12 @@ void hantro_g1_vp8_dec_run(struct hantro_ctx *ctx) { const struct v4l2_ctrl_vp8_frame_header *hdr; struct hantro_dev *vpu = ctx->dev; - size_t height = ctx->dst_fmt.height; - size_t width = ctx->dst_fmt.width; + size_t height = ctx->src_fmt.height; + size_t width = ctx->src_fmt.width; u32 mb_width, mb_height; u32 reg; - hantro_start_prepare_run(ctx); + hantro_prepare_run(ctx); hdr = hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER); if (WARN_ON(!hdr)) @@ -496,7 +496,7 @@ void hantro_g1_vp8_dec_run(struct hantro_ctx *ctx) cfg_ref(ctx, hdr); cfg_buffers(ctx, hdr); - hantro_end_prepare_run(ctx); + hantro_finish_run(ctx); vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT); } diff --git a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c index 0d8afc3e5d711..938b48d4d3d90 100644 --- a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c +++ b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c @@ -87,7 +87,7 @@ void hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx) src_buf = hantro_get_src_buf(ctx); dst_buf = hantro_get_dst_buf(ctx); - hantro_start_prepare_run(ctx); + hantro_prepare_run(ctx); memset(&jpeg_ctx, 0, sizeof(jpeg_ctx)); jpeg_ctx.buffer = vb2_plane_vaddr(&dst_buf->vb2_buf, 0); @@ -122,7 +122,7 @@ void hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx) | H1_REG_ENC_PIC_INTRA | H1_REG_ENC_CTRL_EN_BIT; - hantro_end_prepare_run(ctx); + hantro_finish_run(ctx); vepu_write(vpu, reg, H1_REG_ENC_CTRL); } diff --git a/drivers/staging/media/hantro/hantro_h264.c b/drivers/staging/media/hantro/hantro_h264.c index f2d3e81fb6ce8..f82f0744cfd0e 100644 --- a/drivers/staging/media/hantro/hantro_h264.c +++ b/drivers/staging/media/hantro/hantro_h264.c @@ -225,17 +225,65 @@ static void prepare_table(struct hantro_ctx *ctx) { const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode; + const struct v4l2_ctrl_h264_slice_params *slices = ctrls->slices; struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu; const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; + u32 dpb_longterm = 0; + u32 dpb_valid = 0; int i; + /* + * Set up bit maps of valid and long term DPBs. + * NOTE: The bits are reversed, i.e. MSb is DPB 0. + */ + if ((slices[0].flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) || (slices[0].flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)) { + for (i = 0; i < HANTRO_H264_DPB_SIZE * 2; ++i) { + // check for correct reference use + u32 flag = (i & 0x1) ? V4L2_H264_DPB_ENTRY_FLAG_REF_BOTTOM : V4L2_H264_DPB_ENTRY_FLAG_REF_TOP; + if (dpb[i / 2].flags & flag) + dpb_valid |= BIT(HANTRO_H264_DPB_SIZE * 2 - 1 - i); + + if (dpb[i / 2].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) + dpb_longterm |= BIT(HANTRO_H264_DPB_SIZE * 2 - 1 - i); + } + + ctx->h264_dec.dpb_valid = dpb_valid; + ctx->h264_dec.dpb_longterm = dpb_longterm; + } else { + for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) { + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) + dpb_valid |= BIT(HANTRO_H264_DPB_SIZE - 1 - i); + + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) + dpb_longterm |= BIT(HANTRO_H264_DPB_SIZE - 1 - i); + } + + ctx->h264_dec.dpb_valid = dpb_valid << 16; + ctx->h264_dec.dpb_longterm = dpb_longterm << 16; + } + for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) { - tbl->poc[i * 2] = dpb[i].top_field_order_cnt; - tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt; + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) { + tbl->poc[i * 2] = dpb[i].top_field_order_cnt; + tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt; + } else { + tbl->poc[i * 2] = 0; + tbl->poc[i * 2 + 1] = 0; + } } - tbl->poc[32] = dec_param->top_field_order_cnt; - tbl->poc[33] = dec_param->bottom_field_order_cnt; + if ((slices[0].flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) || !(slices[0].flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)) { + if ((slices[0].flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)) + tbl->poc[32] = (slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD) ? + dec_param->bottom_field_order_cnt : + dec_param->top_field_order_cnt; + else + tbl->poc[32] = min(dec_param->top_field_order_cnt, dec_param->bottom_field_order_cnt); + tbl->poc[33] = 0; + } else { + tbl->poc[32] = dec_param->top_field_order_cnt; + tbl->poc[33] = dec_param->bottom_field_order_cnt; + } reorder_scaling_list(ctx); } @@ -249,21 +297,6 @@ struct hantro_h264_reflist_builder { u8 num_valid; }; -static s32 get_poc(enum v4l2_field field, s32 top_field_order_cnt, - s32 bottom_field_order_cnt) -{ - switch (field) { - case V4L2_FIELD_TOP: - return top_field_order_cnt; - case V4L2_FIELD_BOTTOM: - return bottom_field_order_cnt; - default: - break; - } - - return min(top_field_order_cnt, bottom_field_order_cnt); -} - static void init_reflist_builder(struct hantro_ctx *ctx, struct hantro_h264_reflist_builder *b) @@ -271,9 +304,7 @@ init_reflist_builder(struct hantro_ctx *ctx, const struct v4l2_ctrl_h264_slice_params *slice_params; const struct v4l2_ctrl_h264_decode_params *dec_param; const struct v4l2_ctrl_h264_sps *sps; - struct vb2_v4l2_buffer *buf = hantro_get_dst_buf(ctx); const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; - struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q; int cur_frame_num, max_frame_num; unsigned int i; @@ -285,21 +316,15 @@ init_reflist_builder(struct hantro_ctx *ctx, memset(b, 0, sizeof(*b)); b->dpb = dpb; - b->curpoc = get_poc(buf->field, dec_param->top_field_order_cnt, - dec_param->bottom_field_order_cnt); + b->curpoc = (slice_params->flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD) ? + dec_param->bottom_field_order_cnt : + dec_param->top_field_order_cnt; for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++) { - int buf_idx; - - if (!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + u32 ref_flag = dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_REF_FRAME; + if (!ref_flag) continue; - buf_idx = vb2_find_timestamp(cap_q, dpb[i].reference_ts, 0); - if (buf_idx < 0) - continue; - - buf = to_vb2_v4l2_buffer(vb2_get_buffer(cap_q, buf_idx)); - /* * Handle frame_num wraparound as described in section * '8.2.4.1 Decoding process for picture numbers' of the spec. @@ -311,8 +336,13 @@ init_reflist_builder(struct hantro_ctx *ctx, else b->frame_nums[i] = dpb[i].frame_num; - b->pocs[i] = get_poc(buf->field, dpb[i].top_field_order_cnt, - dpb[i].bottom_field_order_cnt); + if (ref_flag == V4L2_H264_DPB_ENTRY_FLAG_REF_FRAME) + b->pocs[i] = min(dpb[i].bottom_field_order_cnt, dpb[i].top_field_order_cnt); + else if (ref_flag == V4L2_H264_DPB_ENTRY_FLAG_REF_BOTTOM) + b->pocs[i] = dpb[i].bottom_field_order_cnt; + else if (ref_flag == V4L2_H264_DPB_ENTRY_FLAG_REF_TOP) + b->pocs[i] = dpb[i].top_field_order_cnt; + b->unordered_reflist[b->num_valid] = i; b->num_valid++; } @@ -466,8 +496,7 @@ build_b_ref_lists(const struct hantro_h264_reflist_builder *builder, static bool dpb_entry_match(const struct v4l2_h264_dpb_entry *a, const struct v4l2_h264_dpb_entry *b) { - return a->top_field_order_cnt == b->top_field_order_cnt && - a->bottom_field_order_cnt == b->bottom_field_order_cnt; + return a->reference_ts == b->reference_ts; } static void update_dpb(struct hantro_ctx *ctx) @@ -481,13 +510,13 @@ static void update_dpb(struct hantro_ctx *ctx) /* Disable all entries by default. */ for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++) - ctx->h264_dec.dpb[i].flags &= ~V4L2_H264_DPB_ENTRY_FLAG_ACTIVE; + ctx->h264_dec.dpb[i].flags = 0; /* Try to match new DPB entries with existing ones by their POCs. */ for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) { const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i]; - if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID)) continue; /* @@ -498,8 +527,7 @@ static void update_dpb(struct hantro_ctx *ctx) struct v4l2_h264_dpb_entry *cdpb; cdpb = &ctx->h264_dec.dpb[j]; - if (cdpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE || - !dpb_entry_match(cdpb, ndpb)) + if (!dpb_entry_match(cdpb, ndpb)) continue; *cdpb = *ndpb; @@ -535,7 +563,11 @@ dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx, unsigned int dpb_idx) { struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; + const struct v4l2_ctrl_h264_decode_params *dec_param = ctx->h264_dec.ctrls.decode; + const struct v4l2_ctrl_h264_slice_params *slices = ctx->h264_dec.ctrls.slices; dma_addr_t dma_addr = 0; + s32 cur_poc; + u32 flags; if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) dma_addr = hantro_get_ref(ctx, dpb[dpb_idx].reference_ts); @@ -553,7 +585,29 @@ dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx, dma_addr = vb2_dma_contig_plane_dma_addr(buf, 0); } - return dma_addr; + cur_poc = slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD ? + dec_param->bottom_field_order_cnt : + dec_param->top_field_order_cnt; + flags = dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD_PICTURE ? 0x2 : 0; + flags |= abs(dpb[dpb_idx].top_field_order_cnt - cur_poc) < + abs(dpb[dpb_idx].bottom_field_order_cnt - cur_poc) ? + 0x1 : 0; + + return dma_addr | flags; +} + +u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, + unsigned int dpb_idx) +{ + const struct v4l2_h264_dpb_entry *dpb = &ctx->h264_dec.dpb[dpb_idx]; + + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + return 0; + + if (dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) + return dpb->pic_num; + + return dpb->frame_num; } int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx) @@ -562,7 +616,7 @@ int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx) struct hantro_h264_dec_ctrls *ctrls = &h264_ctx->ctrls; struct hantro_h264_reflist_builder reflist_builder; - hantro_start_prepare_run(ctx); + hantro_prepare_run(ctx); ctrls->scaling = hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX); diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h index 2398d4c1f2071..8a4ada2fc8c3b 100644 --- a/drivers/staging/media/hantro/hantro_hw.h +++ b/drivers/staging/media/hantro/hantro_hw.h @@ -28,13 +28,11 @@ struct hantro_variant; * @cpu: CPU pointer to the buffer. * @dma: DMA address of the buffer. * @size: Size of the buffer. - * @attrs: Attributes of the DMA mapping. */ struct hantro_aux_buf { void *cpu; dma_addr_t dma; size_t size; - unsigned long attrs; }; /** @@ -88,6 +86,8 @@ struct hantro_h264_dec_hw_ctx { struct v4l2_h264_dpb_entry dpb[HANTRO_H264_DPB_SIZE]; struct hantro_h264_dec_reflists reflists; struct hantro_h264_dec_ctrls ctrls; + u32 dpb_longterm; + u32 dpb_valid; }; /** @@ -108,15 +108,6 @@ struct hantro_vp8_dec_hw_ctx { struct hantro_aux_buf prob_tbl; }; -/** - * struct hantro_postproc_ctx - * - * @dec_q: References buffers, in decoder format. - */ -struct hantro_postproc_ctx { - struct hantro_aux_buf dec_q[VB2_MAX_FRAME]; -}; - /** * struct hantro_codec_ops - codec mode specific operations * @@ -152,16 +143,14 @@ extern const struct hantro_variant rk3399_vpu_variant; extern const struct hantro_variant rk3328_vpu_variant; extern const struct hantro_variant rk3288_vpu_variant; -extern const struct hantro_postproc_regs hantro_g1_postproc_regs; - extern const u32 hantro_vp8_dec_mc_filter[8][6]; void hantro_watchdog(struct work_struct *work); void hantro_run(struct hantro_ctx *ctx); void hantro_irq_done(struct hantro_dev *vpu, unsigned int bytesused, enum vb2_buffer_state result); -void hantro_start_prepare_run(struct hantro_ctx *ctx); -void hantro_end_prepare_run(struct hantro_ctx *ctx); +void hantro_prepare_run(struct hantro_ctx *ctx); +void hantro_finish_run(struct hantro_ctx *ctx); void hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx); void rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx); @@ -170,8 +159,11 @@ void hantro_jpeg_enc_exit(struct hantro_ctx *ctx); dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx, unsigned int dpb_idx); +u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, + unsigned int dpb_idx); int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx); void hantro_g1_h264_dec_run(struct hantro_ctx *ctx); +void rk3399_vpu_h264_dec_run(struct hantro_ctx *ctx); int hantro_h264_dec_init(struct hantro_ctx *ctx); void hantro_h264_dec_exit(struct hantro_ctx *ctx); diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c index 0198bcda26b75..1dae76f20034d 100644 --- a/drivers/staging/media/hantro/hantro_v4l2.c +++ b/drivers/staging/media/hantro/hantro_v4l2.c @@ -47,30 +47,11 @@ hantro_get_formats(const struct hantro_ctx *ctx, unsigned int *num_fmts) } static const struct hantro_fmt * -hantro_get_postproc_formats(const struct hantro_ctx *ctx, - unsigned int *num_fmts) +hantro_find_format(const struct hantro_fmt *formats, unsigned int num_fmts, + u32 fourcc) { - if (hantro_is_encoder_ctx(ctx)) { - *num_fmts = 0; - return NULL; - } - - *num_fmts = ctx->dev->variant->num_postproc_fmts; - return ctx->dev->variant->postproc_fmts; -} - -static const struct hantro_fmt * -hantro_find_format(const struct hantro_ctx *ctx, u32 fourcc) -{ - const struct hantro_fmt *formats; - unsigned int i, num_fmts; - - formats = hantro_get_formats(ctx, &num_fmts); - for (i = 0; i < num_fmts; i++) - if (formats[i].fourcc == fourcc) - return &formats[i]; + unsigned int i; - formats = hantro_get_postproc_formats(ctx, &num_fmts); for (i = 0; i < num_fmts; i++) if (formats[i].fourcc == fourcc) return &formats[i]; @@ -78,12 +59,11 @@ hantro_find_format(const struct hantro_ctx *ctx, u32 fourcc) } static const struct hantro_fmt * -hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream) +hantro_get_default_fmt(const struct hantro_fmt *formats, unsigned int num_fmts, + bool bitstream) { - const struct hantro_fmt *formats; - unsigned int i, num_fmts; + unsigned int i; - formats = hantro_get_formats(ctx, &num_fmts); for (i = 0; i < num_fmts; i++) { if (bitstream == (formats[i].codec_mode != HANTRO_MODE_NONE)) @@ -109,7 +89,8 @@ static int vidioc_enum_framesizes(struct file *file, void *priv, struct v4l2_frmsizeenum *fsize) { struct hantro_ctx *ctx = fh_to_ctx(priv); - const struct hantro_fmt *fmt; + const struct hantro_fmt *formats, *fmt; + unsigned int num_fmts; if (fsize->index != 0) { vpu_debug(0, "invalid frame size index (expected 0, got %d)\n", @@ -117,7 +98,8 @@ static int vidioc_enum_framesizes(struct file *file, void *priv, return -EINVAL; } - fmt = hantro_find_format(ctx, fsize->pixel_format); + formats = hantro_get_formats(ctx, &num_fmts); + fmt = hantro_find_format(formats, num_fmts, fsize->pixel_format); if (!fmt) { vpu_debug(0, "unsupported bitstream format (%08x)\n", fsize->pixel_format); @@ -168,24 +150,6 @@ static int vidioc_enum_fmt(struct file *file, void *priv, } ++j; } - - /* - * Enumerate post-processed formats. As per the specification, - * we enumerated these formats after natively decoded formats - * as a hint for applications on what's the preferred fomat. - */ - if (!capture) - return -EINVAL; - formats = hantro_get_postproc_formats(ctx, &num_fmts); - for (i = 0; i < num_fmts; i++) { - if (j == f->index) { - fmt = &formats[i]; - f->pixelformat = fmt->fourcc; - return 0; - } - ++j; - } - return -EINVAL; } @@ -232,7 +196,8 @@ static int vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f, { struct hantro_ctx *ctx = fh_to_ctx(priv); struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp; - const struct hantro_fmt *fmt, *vpu_fmt; + const struct hantro_fmt *formats, *fmt, *vpu_fmt; + unsigned int num_fmts; bool coded; coded = capture == hantro_is_encoder_ctx(ctx); @@ -243,9 +208,10 @@ static int vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f, (pix_mp->pixelformat >> 16) & 0x7f, (pix_mp->pixelformat >> 24) & 0x7f); - fmt = hantro_find_format(ctx, pix_mp->pixelformat); + formats = hantro_get_formats(ctx, &num_fmts); + fmt = hantro_find_format(formats, num_fmts, pix_mp->pixelformat); if (!fmt) { - fmt = hantro_get_default_fmt(ctx, coded); + fmt = hantro_get_default_fmt(formats, num_fmts, coded); f->fmt.pix_mp.pixelformat = fmt->fourcc; } @@ -280,7 +246,7 @@ static int vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f, * * The H264 decoder needs extra space on the output buffers * to store motion vectors. This is needed for reference - * frames and only if the format is non-post-processed NV12. + * frames. * * Memory layout is as follow: * @@ -294,8 +260,7 @@ static int vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f, * | MC sync 32 bytes | * +---------------------------+ */ - if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE && - !hantro_needs_postproc(ctx, ctx->vpu_dst_fmt)) + if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE) pix_mp->plane_fmt[0].sizeimage += 64 * MB_WIDTH(pix_mp->width) * MB_WIDTH(pix_mp->height) + 32; @@ -341,10 +306,12 @@ hantro_reset_fmt(struct v4l2_pix_format_mplane *fmt, static void hantro_reset_encoded_fmt(struct hantro_ctx *ctx) { - const struct hantro_fmt *vpu_fmt; + const struct hantro_fmt *vpu_fmt, *formats; struct v4l2_pix_format_mplane *fmt; + unsigned int num_fmts; - vpu_fmt = hantro_get_default_fmt(ctx, true); + formats = hantro_get_formats(ctx, &num_fmts); + vpu_fmt = hantro_get_default_fmt(formats, num_fmts, true); if (hantro_is_encoder_ctx(ctx)) { ctx->vpu_dst_fmt = vpu_fmt; @@ -365,10 +332,12 @@ hantro_reset_encoded_fmt(struct hantro_ctx *ctx) static void hantro_reset_raw_fmt(struct hantro_ctx *ctx) { - const struct hantro_fmt *raw_vpu_fmt; + const struct hantro_fmt *raw_vpu_fmt, *formats; struct v4l2_pix_format_mplane *raw_fmt, *encoded_fmt; + unsigned int num_fmts; - raw_vpu_fmt = hantro_get_default_fmt(ctx, false); + formats = hantro_get_formats(ctx, &num_fmts); + raw_vpu_fmt = hantro_get_default_fmt(formats, num_fmts, false); if (hantro_is_encoder_ctx(ctx)) { ctx->vpu_src_fmt = raw_vpu_fmt; @@ -415,6 +384,8 @@ vidioc_s_fmt_out_mplane(struct file *file, void *priv, struct v4l2_format *f) struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp; struct hantro_ctx *ctx = fh_to_ctx(priv); struct vb2_queue *vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); + const struct hantro_fmt *formats; + unsigned int num_fmts; int ret; ret = vidioc_try_fmt_out_mplane(file, priv, f); @@ -450,7 +421,9 @@ vidioc_s_fmt_out_mplane(struct file *file, void *priv, struct v4l2_format *f) return -EBUSY; } - ctx->vpu_src_fmt = hantro_find_format(ctx, pix_mp->pixelformat); + formats = hantro_get_formats(ctx, &num_fmts); + ctx->vpu_src_fmt = hantro_find_format(formats, num_fmts, + pix_mp->pixelformat); ctx->src_fmt = *pix_mp; /* @@ -484,7 +457,9 @@ static int vidioc_s_fmt_cap_mplane(struct file *file, void *priv, { struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp; struct hantro_ctx *ctx = fh_to_ctx(priv); + const struct hantro_fmt *formats; struct vb2_queue *vq; + unsigned int num_fmts; int ret; /* Change not allowed if queue is busy. */ @@ -513,7 +488,9 @@ static int vidioc_s_fmt_cap_mplane(struct file *file, void *priv, if (ret) return ret; - ctx->vpu_dst_fmt = hantro_find_format(ctx, pix_mp->pixelformat); + formats = hantro_get_formats(ctx, &num_fmts); + ctx->vpu_dst_fmt = hantro_find_format(formats, num_fmts, + pix_mp->pixelformat); ctx->dst_fmt = *pix_mp; /* @@ -673,23 +650,10 @@ static int hantro_start_streaming(struct vb2_queue *q, unsigned int count) vpu_debug(4, "Codec mode = %d\n", codec_mode); ctx->codec_ops = &ctx->dev->variant->codec_ops[codec_mode]; - if (ctx->codec_ops->init) { + if (ctx->codec_ops->init) ret = ctx->codec_ops->init(ctx); - if (ret) - return ret; - } - - if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt)) { - ret = hantro_postproc_alloc(ctx); - if (ret) - goto err_codec_exit; - } } - return ret; -err_codec_exit: - if (ctx->codec_ops->exit) - ctx->codec_ops->exit(ctx); return ret; } @@ -716,7 +680,6 @@ static void hantro_stop_streaming(struct vb2_queue *q) struct hantro_ctx *ctx = vb2_get_drv_priv(q); if (hantro_vq_is_coded(q)) { - hantro_postproc_free(ctx); if (ctx->codec_ops && ctx->codec_ops->exit) ctx->codec_ops->exit(ctx); } diff --git a/drivers/staging/media/hantro/hantro_vp8.c b/drivers/staging/media/hantro/hantro_vp8.c index 0e02d147b189d..e010c9088fdeb 100644 --- a/drivers/staging/media/hantro/hantro_vp8.c +++ b/drivers/staging/media/hantro/hantro_vp8.c @@ -151,8 +151,8 @@ int hantro_vp8_dec_init(struct hantro_ctx *ctx) int ret; /* segment map table size calculation */ - mb_width = DIV_ROUND_UP(ctx->dst_fmt.width, 16); - mb_height = DIV_ROUND_UP(ctx->dst_fmt.height, 16); + mb_width = MB_WIDTH(ctx->src_fmt.width); + mb_height = MB_HEIGHT(ctx->src_fmt.height); segment_map_size = round_up(DIV_ROUND_UP(mb_width * mb_height, 4), 64); /* diff --git a/drivers/staging/media/hantro/rk3288_vpu_hw.c b/drivers/staging/media/hantro/rk3288_vpu_hw.c index 2f914b37b9e56..f8db6fcaad733 100644 --- a/drivers/staging/media/hantro/rk3288_vpu_hw.c +++ b/drivers/staging/media/hantro/rk3288_vpu_hw.c @@ -56,13 +56,6 @@ static const struct hantro_fmt rk3288_vpu_enc_fmts[] = { }, }; -static const struct hantro_fmt rk3288_vpu_postproc_fmts[] = { - { - .fourcc = V4L2_PIX_FMT_YUYV, - .codec_mode = HANTRO_MODE_NONE, - }, -}; - static const struct hantro_fmt rk3288_vpu_dec_fmts[] = { { .fourcc = V4L2_PIX_FMT_NV12, @@ -222,9 +215,6 @@ const struct hantro_variant rk3288_vpu_variant = { .dec_offset = 0x400, .dec_fmts = rk3288_vpu_dec_fmts, .num_dec_fmts = ARRAY_SIZE(rk3288_vpu_dec_fmts), - .postproc_fmts = rk3288_vpu_postproc_fmts, - .num_postproc_fmts = ARRAY_SIZE(rk3288_vpu_postproc_fmts), - .postproc_regs = &hantro_g1_postproc_regs, .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | HANTRO_H264_DECODER, .codec_ops = rk3288_vpu_codec_ops, diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw.c b/drivers/staging/media/hantro/rk3399_vpu_hw.c index 9ac1f2cb6a16b..78f878ca01ff7 100644 --- a/drivers/staging/media/hantro/rk3399_vpu_hw.c +++ b/drivers/staging/media/hantro/rk3399_vpu_hw.c @@ -60,6 +60,19 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = { .fourcc = V4L2_PIX_FMT_NV12, .codec_mode = HANTRO_MODE_NONE, }, + { + .fourcc = V4L2_PIX_FMT_H264_SLICE, + .codec_mode = HANTRO_MODE_H264_DEC, + .max_depth = 2, + .frmsize = { + .min_width = 48, + .max_width = 1920, + .step_width = MB_DIM, + .min_height = 48, + .max_height = 1088, + .step_height = MB_DIM, + }, + }, { .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, .codec_mode = HANTRO_MODE_MPEG2_DEC, @@ -161,6 +174,12 @@ static const struct hantro_codec_ops rk3399_vpu_codec_ops[] = { .init = hantro_jpeg_enc_init, .exit = hantro_jpeg_enc_exit, }, + [HANTRO_MODE_H264_DEC] = { + .run = rk3399_vpu_h264_dec_run, + .reset = rk3399_vpu_dec_reset, + .init = hantro_h264_dec_init, + .exit = hantro_h264_dec_exit, + }, [HANTRO_MODE_MPEG2_DEC] = { .run = rk3399_vpu_mpeg2_dec_run, .reset = rk3399_vpu_dec_reset, @@ -196,7 +215,7 @@ const struct hantro_variant rk3399_vpu_variant = { .dec_fmts = rk3399_vpu_dec_fmts, .num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts), .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER | - HANTRO_VP8_DECODER, + HANTRO_VP8_DECODER | HANTRO_H264_DECODER, .codec_ops = rk3399_vpu_codec_ops, .irqs = rk3399_irqs, .num_irqs = ARRAY_SIZE(rk3399_irqs), @@ -213,7 +232,8 @@ const struct hantro_variant rk3328_vpu_variant = { .dec_offset = 0x400, .dec_fmts = rk3399_vpu_dec_fmts, .num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts), - .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER, + .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | + HANTRO_H264_DECODER, .codec_ops = rk3399_vpu_codec_ops, .irqs = rk3328_irqs, .num_irqs = ARRAY_SIZE(rk3328_irqs), diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_h264_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_h264_dec.c new file mode 100644 index 0000000000000..02a81f03eed12 --- /dev/null +++ b/drivers/staging/media/hantro/rk3399_vpu_hw_h264_dec.c @@ -0,0 +1,493 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (c) 2014 Rockchip Electronics Co., Ltd. + * Hertz Wong + * Herman Chen + * + * Copyright (C) 2014 Google, Inc. + * Tomasz Figa + */ + +#include +#include + +#include + +#include "hantro_hw.h" +#include "hantro_v4l2.h" + +#define VDPU_SWREG(nr) ((nr) * 4) + +#define VDPU_REG_DEC_OUT_BASE VDPU_SWREG(63) +#define VDPU_REG_RLC_VLC_BASE VDPU_SWREG(64) +#define VDPU_REG_QTABLE_BASE VDPU_SWREG(61) +#define VDPU_REG_DIR_MV_BASE VDPU_SWREG(62) +#define VDPU_REG_REFER0_BASE VDPU_SWREG(84) +#define VDPU_REG_REFER1_BASE VDPU_SWREG(85) +#define VDPU_REG_REFER2_BASE VDPU_SWREG(86) +#define VDPU_REG_REFER3_BASE VDPU_SWREG(87) +#define VDPU_REG_REFER4_BASE VDPU_SWREG(88) +#define VDPU_REG_REFER5_BASE VDPU_SWREG(89) +#define VDPU_REG_REFER6_BASE VDPU_SWREG(90) +#define VDPU_REG_REFER7_BASE VDPU_SWREG(91) +#define VDPU_REG_REFER8_BASE VDPU_SWREG(92) +#define VDPU_REG_REFER9_BASE VDPU_SWREG(93) +#define VDPU_REG_REFER10_BASE VDPU_SWREG(94) +#define VDPU_REG_REFER11_BASE VDPU_SWREG(95) +#define VDPU_REG_REFER12_BASE VDPU_SWREG(96) +#define VDPU_REG_REFER13_BASE VDPU_SWREG(97) +#define VDPU_REG_REFER14_BASE VDPU_SWREG(98) +#define VDPU_REG_REFER15_BASE VDPU_SWREG(99) +#define VDPU_REG_DEC_E(v) ((v) ? BIT(0) : 0) + +#define VDPU_REG_DEC_ADV_PRE_DIS(v) ((v) ? BIT(11) : 0) +#define VDPU_REG_DEC_SCMD_DIS(v) ((v) ? BIT(10) : 0) +#define VDPU_REG_FILTERING_DIS(v) ((v) ? BIT(8) : 0) +#define VDPU_REG_PIC_FIXED_QUANT(v) ((v) ? BIT(7) : 0) +#define VDPU_REG_DEC_LATENCY(v) (((v) << 1) & GENMASK(6, 1)) + +#define VDPU_REG_INIT_QP(v) (((v) << 25) & GENMASK(30, 25)) +#define VDPU_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0)) + +#define VDPU_REG_APF_THRESHOLD(v) (((v) << 17) & GENMASK(30, 17)) +#define VDPU_REG_STARTMB_X(v) (((v) << 8) & GENMASK(16, 8)) +#define VDPU_REG_STARTMB_Y(v) (((v) << 0) & GENMASK(7, 0)) + +#define VDPU_REG_DEC_MODE(v) (((v) << 0) & GENMASK(3, 0)) + +#define VDPU_REG_DEC_STRENDIAN_E(v) ((v) ? BIT(5) : 0) +#define VDPU_REG_DEC_STRSWAP32_E(v) ((v) ? BIT(4) : 0) +#define VDPU_REG_DEC_OUTSWAP32_E(v) ((v) ? BIT(3) : 0) +#define VDPU_REG_DEC_INSWAP32_E(v) ((v) ? BIT(2) : 0) +#define VDPU_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(1) : 0) +#define VDPU_REG_DEC_IN_ENDIAN(v) ((v) ? BIT(0) : 0) + +#define VDPU_REG_DEC_DATA_DISC_E(v) ((v) ? BIT(22) : 0) +#define VDPU_REG_DEC_MAX_BURST(v) (((v) << 16) & GENMASK(20, 16)) +#define VDPU_REG_DEC_AXI_WR_ID(v) (((v) << 8) & GENMASK(15, 8)) +#define VDPU_REG_DEC_AXI_RD_ID(v) (((v) << 0) & GENMASK(7, 0)) + +#define VDPU_REG_START_CODE_E(v) ((v) ? BIT(22) : 0) +#define VDPU_REG_CH_8PIX_ILEAV_E(v) ((v) ? BIT(21) : 0) +#define VDPU_REG_RLC_MODE_E(v) ((v) ? BIT(20) : 0) +#define VDPU_REG_PIC_INTERLACE_E(v) ((v) ? BIT(17) : 0) +#define VDPU_REG_PIC_FIELDMODE_E(v) ((v) ? BIT(16) : 0) +#define VDPU_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(13) : 0) +#define VDPU_REG_WRITE_MVS_E(v) ((v) ? BIT(10) : 0) +#define VDPU_REG_SEQ_MBAFF_E(v) ((v) ? BIT(7) : 0) +#define VDPU_REG_PICORD_COUNT_E(v) ((v) ? BIT(6) : 0) +#define VDPU_REG_DEC_TIMEOUT_E(v) ((v) ? BIT(5) : 0) +#define VDPU_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(4) : 0) + +#define VDPU_REG_PRED_BC_TAP_0_0(v) (((v) << 22) & GENMASK(31, 22)) +#define VDPU_REG_PRED_BC_TAP_0_1(v) (((v) << 12) & GENMASK(21, 12)) +#define VDPU_REG_PRED_BC_TAP_0_2(v) (((v) << 2) & GENMASK(11, 2)) + +#define VDPU_REG_REFBU_E(v) ((v) ? BIT(31) : 0) + +#define VDPU_REG_PINIT_RLIST_F9(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_PINIT_RLIST_F8(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_PINIT_RLIST_F7(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_PINIT_RLIST_F6(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_PINIT_RLIST_F5(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_PINIT_RLIST_F4(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_PINIT_RLIST_F15(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_PINIT_RLIST_F14(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_PINIT_RLIST_F13(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_PINIT_RLIST_F12(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_PINIT_RLIST_F11(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_PINIT_RLIST_F10(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_REFER1_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER0_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER3_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER2_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER5_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER4_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER7_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER6_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER9_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER8_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER11_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER10_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER13_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER12_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER15_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER14_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_BINIT_RLIST_F5(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_BINIT_RLIST_F4(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_BINIT_RLIST_F3(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_F2(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_F1(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_F0(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_BINIT_RLIST_F11(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_BINIT_RLIST_F10(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_BINIT_RLIST_F9(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_F8(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_F7(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_F6(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_BINIT_RLIST_F15(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_F14(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_F13(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_F12(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_BINIT_RLIST_B5(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_BINIT_RLIST_B4(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_BINIT_RLIST_B3(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_B2(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_B1(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_B0(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_BINIT_RLIST_B11(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_BINIT_RLIST_B10(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_BINIT_RLIST_B9(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_B8(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_B7(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_B6(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_BINIT_RLIST_B15(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_B14(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_B13(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_B12(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_PINIT_RLIST_F3(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_PINIT_RLIST_F2(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_PINIT_RLIST_F1(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_PINIT_RLIST_F0(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_REFER_LTERM_E(v) (((v) << 0) & GENMASK(31, 0)) + +#define VDPU_REG_REFER_VALID_E(v) (((v) << 0) & GENMASK(31, 0)) + +#define VDPU_REG_STRM_START_BIT(v) (((v) << 0) & GENMASK(5, 0)) + +#define VDPU_REG_CH_QP_OFFSET2(v) (((v) << 22) & GENMASK(26, 22)) +#define VDPU_REG_CH_QP_OFFSET(v) (((v) << 17) & GENMASK(21, 17)) +#define VDPU_REG_PIC_MB_HEIGHT_P(v) (((v) << 9) & GENMASK(16, 9)) +#define VDPU_REG_PIC_MB_WIDTH(v) (((v) << 0) & GENMASK(8, 0)) + +#define VDPU_REG_WEIGHT_BIPR_IDC(v) (((v) << 16) & GENMASK(17, 16)) +#define VDPU_REG_REF_FRAMES(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_FILT_CTRL_PRES(v) ((v) ? BIT(31) : 0) +#define VDPU_REG_RDPIC_CNT_PRES(v) ((v) ? BIT(30) : 0) +#define VDPU_REG_FRAMENUM_LEN(v) (((v) << 16) & GENMASK(20, 16)) +#define VDPU_REG_FRAMENUM(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFPIC_MK_LEN(v) (((v) << 16) & GENMASK(26, 16)) +#define VDPU_REG_IDR_PIC_ID(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_PPS_ID(v) (((v) << 24) & GENMASK(31, 24)) +#define VDPU_REG_REFIDX1_ACTIVE(v) (((v) << 19) & GENMASK(23, 19)) +#define VDPU_REG_REFIDX0_ACTIVE(v) (((v) << 14) & GENMASK(18, 14)) +#define VDPU_REG_POC_LENGTH(v) (((v) << 0) & GENMASK(7, 0)) + +#define VDPU_REG_IDR_PIC_E(v) ((v) ? BIT(8) : 0) +#define VDPU_REG_DIR_8X8_INFER_E(v) ((v) ? BIT(7) : 0) +#define VDPU_REG_BLACKWHITE_E(v) ((v) ? BIT(6) : 0) +#define VDPU_REG_CABAC_E(v) ((v) ? BIT(5) : 0) +#define VDPU_REG_WEIGHT_PRED_E(v) ((v) ? BIT(4) : 0) +#define VDPU_REG_CONST_INTRA_E(v) ((v) ? BIT(3) : 0) +#define VDPU_REG_8X8TRANS_FLAG_E(v) ((v) ? BIT(2) : 0) +#define VDPU_REG_TYPE1_QUANT_E(v) ((v) ? BIT(1) : 0) +#define VDPU_REG_FIELDPIC_FLAG_E(v) ((v) ? BIT(0) : 0) + +void rk3399_vpu_h264_dec_run(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *src_buf, *dst_buf; + const struct hantro_h264_dec_ctrls *ctrls; + const struct v4l2_ctrl_h264_decode_params *decode; + const struct v4l2_ctrl_h264_slice_params *slices; + const struct v4l2_ctrl_h264_sps *sps; + const struct v4l2_ctrl_h264_pps *pps; + const u8 *b0_reflist, *b1_reflist, *p_reflist; + dma_addr_t addr; + size_t offset = 0; + u32 reg; + + /* Prepare the H264 decoder context. */ + if (hantro_h264_dec_prepare_run(ctx)) + return; + + src_buf = hantro_get_src_buf(ctx); + dst_buf = hantro_get_dst_buf(ctx); + + ctrls = &ctx->h264_dec.ctrls; + decode = ctrls->decode; + slices = ctrls->slices; + sps = ctrls->sps; + pps = ctrls->pps; + + b0_reflist = ctx->h264_dec.reflists.b0; + b1_reflist = ctx->h264_dec.reflists.b1; + p_reflist = ctx->h264_dec.reflists.p; + + reg = VDPU_REG_DEC_ADV_PRE_DIS(0) | + VDPU_REG_DEC_SCMD_DIS(0) | + VDPU_REG_FILTERING_DIS(0) | + VDPU_REG_PIC_FIXED_QUANT(0) | + VDPU_REG_DEC_LATENCY(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(50)); + + reg = VDPU_REG_INIT_QP(pps->pic_init_qp_minus26 + 26) | + VDPU_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(51)); + + reg = VDPU_REG_APF_THRESHOLD(8) | + VDPU_REG_STARTMB_X(0) | + VDPU_REG_STARTMB_Y(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(52)); + + reg = VDPU_REG_DEC_MODE(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(53)); + + reg = VDPU_REG_DEC_STRENDIAN_E(1) | + VDPU_REG_DEC_STRSWAP32_E(1) | + VDPU_REG_DEC_OUTSWAP32_E(1) | + VDPU_REG_DEC_INSWAP32_E(1) | + VDPU_REG_DEC_OUT_ENDIAN(1) | + VDPU_REG_DEC_IN_ENDIAN(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(54)); + + reg = VDPU_REG_DEC_DATA_DISC_E(0) | + VDPU_REG_DEC_MAX_BURST(16) | + VDPU_REG_DEC_AXI_WR_ID(0) | + VDPU_REG_DEC_AXI_RD_ID(0xff); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(56)); + + reg = VDPU_REG_START_CODE_E(1) | + VDPU_REG_CH_8PIX_ILEAV_E(0) | + VDPU_REG_RLC_MODE_E(0) | + VDPU_REG_PIC_INTERLACE_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) && (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD || slices[0].flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)) | + VDPU_REG_PIC_FIELDMODE_E(slices[0].flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) | + VDPU_REG_PIC_TOPFIELD_E(!(slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)) | + VDPU_REG_WRITE_MVS_E(sps->profile_idc > 66 && decode->nal_ref_idc) | + VDPU_REG_SEQ_MBAFF_E(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) | + VDPU_REG_PICORD_COUNT_E(sps->profile_idc > 66) | + VDPU_REG_DEC_TIMEOUT_E(1) | + VDPU_REG_DEC_CLK_GATE_E(1); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(57)); + + reg = VDPU_REG_PRED_BC_TAP_0_0(1) | + VDPU_REG_PRED_BC_TAP_0_1((u32)-5) | + VDPU_REG_PRED_BC_TAP_0_2(20); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(59)); + + reg = VDPU_REG_REFBU_E(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(65)); + + reg = VDPU_REG_PINIT_RLIST_F9(p_reflist[9]) | + VDPU_REG_PINIT_RLIST_F8(p_reflist[8]) | + VDPU_REG_PINIT_RLIST_F7(p_reflist[7]) | + VDPU_REG_PINIT_RLIST_F6(p_reflist[6]) | + VDPU_REG_PINIT_RLIST_F5(p_reflist[5]) | + VDPU_REG_PINIT_RLIST_F4(p_reflist[4]); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(74)); + + reg = VDPU_REG_PINIT_RLIST_F15(p_reflist[15]) | + VDPU_REG_PINIT_RLIST_F14(p_reflist[14]) | + VDPU_REG_PINIT_RLIST_F13(p_reflist[13]) | + VDPU_REG_PINIT_RLIST_F12(p_reflist[12]) | + VDPU_REG_PINIT_RLIST_F11(p_reflist[11]) | + VDPU_REG_PINIT_RLIST_F10(p_reflist[10]); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(75)); + + reg = VDPU_REG_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, 1)) | + VDPU_REG_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, 0)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(76)); + + reg = VDPU_REG_REFER3_NBR(hantro_h264_get_ref_nbr(ctx, 3)) | + VDPU_REG_REFER2_NBR(hantro_h264_get_ref_nbr(ctx, 2)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(77)); + + reg = VDPU_REG_REFER5_NBR(hantro_h264_get_ref_nbr(ctx, 5)) | + VDPU_REG_REFER4_NBR(hantro_h264_get_ref_nbr(ctx, 4)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(78)); + + reg = VDPU_REG_REFER7_NBR(hantro_h264_get_ref_nbr(ctx, 7)) | + VDPU_REG_REFER6_NBR(hantro_h264_get_ref_nbr(ctx, 6)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(79)); + + reg = VDPU_REG_REFER9_NBR(hantro_h264_get_ref_nbr(ctx, 9)) | + VDPU_REG_REFER8_NBR(hantro_h264_get_ref_nbr(ctx, 8)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(80)); + + reg = VDPU_REG_REFER11_NBR(hantro_h264_get_ref_nbr(ctx, 11)) | + VDPU_REG_REFER10_NBR(hantro_h264_get_ref_nbr(ctx, 10)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(81)); + + reg = VDPU_REG_REFER13_NBR(hantro_h264_get_ref_nbr(ctx, 13)) | + VDPU_REG_REFER12_NBR(hantro_h264_get_ref_nbr(ctx, 12)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(82)); + + reg = VDPU_REG_REFER15_NBR(hantro_h264_get_ref_nbr(ctx, 15)) | + VDPU_REG_REFER14_NBR(hantro_h264_get_ref_nbr(ctx, 14)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(83)); + + reg = VDPU_REG_BINIT_RLIST_F5(b0_reflist[5]) | + VDPU_REG_BINIT_RLIST_F4(b0_reflist[4]) | + VDPU_REG_BINIT_RLIST_F3(b0_reflist[3]) | + VDPU_REG_BINIT_RLIST_F2(b0_reflist[2]) | + VDPU_REG_BINIT_RLIST_F1(b0_reflist[1]) | + VDPU_REG_BINIT_RLIST_F0(b0_reflist[0]); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(100)); + + reg = VDPU_REG_BINIT_RLIST_F11(b0_reflist[11]) | + VDPU_REG_BINIT_RLIST_F10(b0_reflist[10]) | + VDPU_REG_BINIT_RLIST_F9(b0_reflist[9]) | + VDPU_REG_BINIT_RLIST_F8(b0_reflist[8]) | + VDPU_REG_BINIT_RLIST_F7(b0_reflist[7]) | + VDPU_REG_BINIT_RLIST_F6(b0_reflist[6]); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(101)); + + reg = VDPU_REG_BINIT_RLIST_F15(b0_reflist[15]) | + VDPU_REG_BINIT_RLIST_F14(b0_reflist[14]) | + VDPU_REG_BINIT_RLIST_F13(b0_reflist[13]) | + VDPU_REG_BINIT_RLIST_F12(b0_reflist[12]); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(102)); + + reg = VDPU_REG_BINIT_RLIST_B5(b1_reflist[5]) | + VDPU_REG_BINIT_RLIST_B4(b1_reflist[4]) | + VDPU_REG_BINIT_RLIST_B3(b1_reflist[3]) | + VDPU_REG_BINIT_RLIST_B2(b1_reflist[2]) | + VDPU_REG_BINIT_RLIST_B1(b1_reflist[1]) | + VDPU_REG_BINIT_RLIST_B0(b1_reflist[0]); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(103)); + + reg = VDPU_REG_BINIT_RLIST_B11(b1_reflist[11]) | + VDPU_REG_BINIT_RLIST_B10(b1_reflist[10]) | + VDPU_REG_BINIT_RLIST_B9(b1_reflist[9]) | + VDPU_REG_BINIT_RLIST_B8(b1_reflist[8]) | + VDPU_REG_BINIT_RLIST_B7(b1_reflist[7]) | + VDPU_REG_BINIT_RLIST_B6(b1_reflist[6]); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(104)); + + reg = VDPU_REG_BINIT_RLIST_B15(b1_reflist[15]) | + VDPU_REG_BINIT_RLIST_B14(b1_reflist[14]) | + VDPU_REG_BINIT_RLIST_B13(b1_reflist[13]) | + VDPU_REG_BINIT_RLIST_B12(b1_reflist[12]); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(105)); + + reg = VDPU_REG_PINIT_RLIST_F3(p_reflist[3]) | + VDPU_REG_PINIT_RLIST_F2(p_reflist[2]) | + VDPU_REG_PINIT_RLIST_F1(p_reflist[1]) | + VDPU_REG_PINIT_RLIST_F0(p_reflist[0]); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(106)); + + reg = VDPU_REG_REFER_LTERM_E(ctx->h264_dec.dpb_longterm); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(107)); + + reg = VDPU_REG_REFER_VALID_E(ctx->h264_dec.dpb_valid); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(108)); + + reg = VDPU_REG_STRM_START_BIT(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(109)); + + reg = VDPU_REG_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset) | + VDPU_REG_CH_QP_OFFSET(pps->chroma_qp_index_offset) | + VDPU_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) | + VDPU_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(110)); + + reg = VDPU_REG_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc) | + VDPU_REG_REF_FRAMES(sps->max_num_ref_frames); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(111)); + + reg = VDPU_REG_FILT_CTRL_PRES(pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) | + VDPU_REG_RDPIC_CNT_PRES(pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT) | + VDPU_REG_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) | + VDPU_REG_FRAMENUM(slices[0].frame_num); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(112)); + + reg = VDPU_REG_REFPIC_MK_LEN(slices[0].dec_ref_pic_marking_bit_size) | + VDPU_REG_IDR_PIC_ID(slices[0].idr_pic_id); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(113)); + + reg = VDPU_REG_PPS_ID(slices[0].pic_parameter_set_id) | + VDPU_REG_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) | + VDPU_REG_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) | + VDPU_REG_POC_LENGTH(slices[0].pic_order_cnt_bit_size); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(114)); + + reg = VDPU_REG_IDR_PIC_E(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) | + VDPU_REG_DIR_8X8_INFER_E(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) | + VDPU_REG_BLACKWHITE_E(sps->profile_idc >= 100 && sps->chroma_format_idc == 0) | + VDPU_REG_CABAC_E(pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) | + VDPU_REG_WEIGHT_PRED_E(pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) | + VDPU_REG_CONST_INTRA_E(pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) | + VDPU_REG_8X8TRANS_FLAG_E(pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) | + VDPU_REG_TYPE1_QUANT_E(1) | + VDPU_REG_FIELDPIC_FLAG_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(115)); + + /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */ + vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, VDPU_REG_QTABLE_BASE); + + /* Source (stream) buffer. */ + addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + vdpu_write_relaxed(vpu, addr, VDPU_REG_RLC_VLC_BASE); + + /* Destination (decoded frame) buffer. */ + addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); + /* Adjust dma addr to start at second line for bottom field */ + if (slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD) + offset = ALIGN(ctx->src_fmt.width, MB_DIM); + vdpu_write_relaxed(vpu, addr + offset, VDPU_REG_DEC_OUT_BASE); + + /* Higher profiles require DMV buffer appended to reference frames. */ + if (sps->profile_idc > 66 && decode->nal_ref_idc) { + unsigned int bytes_per_mb = 384; + + /* DMV buffer for monochrome start directly after Y-plane */ + if (sps->profile_idc >= 100 && sps->chroma_format_idc == 0) + bytes_per_mb = 256; + offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) * + MB_HEIGHT(ctx->src_fmt.height); + + /* + * DMV buffer is split in two for field encoded frames, + * adjust offset for bottom field + */ + if (slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD) + offset += 32 * MB_WIDTH(ctx->src_fmt.width) * + MB_HEIGHT(ctx->src_fmt.height); + vdpu_write_relaxed(vpu, addr + offset, VDPU_REG_DIR_MV_BASE); + } + + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 0), VDPU_REG_REFER0_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 1), VDPU_REG_REFER1_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 2), VDPU_REG_REFER2_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 3), VDPU_REG_REFER3_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 4), VDPU_REG_REFER4_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 5), VDPU_REG_REFER5_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 6), VDPU_REG_REFER6_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 7), VDPU_REG_REFER7_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 8), VDPU_REG_REFER8_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 9), VDPU_REG_REFER9_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 10), VDPU_REG_REFER10_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 11), VDPU_REG_REFER11_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 12), VDPU_REG_REFER12_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 13), VDPU_REG_REFER13_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 14), VDPU_REG_REFER14_BASE); + vdpu_write_relaxed(vpu, hantro_h264_get_ref_buf(ctx, 15), VDPU_REG_REFER15_BASE); + + hantro_finish_run(ctx); + + /* Start decoding! */ + reg = vdpu_read(vpu, VDPU_SWREG(57)) | VDPU_REG_DEC_E(1); + vdpu_write(vpu, reg, VDPU_SWREG(57)); +} diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c b/drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c index 4c2d43fb6fd14..067892345b5d0 100644 --- a/drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c +++ b/drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c @@ -118,7 +118,7 @@ void rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx) src_buf = hantro_get_src_buf(ctx); dst_buf = hantro_get_dst_buf(ctx); - hantro_start_prepare_run(ctx); + hantro_prepare_run(ctx); memset(&jpeg_ctx, 0, sizeof(jpeg_ctx)); jpeg_ctx.buffer = vb2_plane_vaddr(&dst_buf->vb2_buf, 0); @@ -156,6 +156,6 @@ void rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx) | VEPU_REG_ENCODE_ENABLE; /* Kick the watchdog and start encoding */ - hantro_end_prepare_run(ctx); + hantro_finish_run(ctx); vepu_write(vpu, reg, VEPU_REG_ENCODE_START); } diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c index 7e9aad6714896..62a4e6ad7aa89 100644 --- a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c +++ b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c @@ -127,7 +127,7 @@ rk3399_vpu_mpeg2_dec_set_buffers(struct hantro_dev *vpu, current_addr = addr; if (picture->picture_structure == PICT_BOTTOM_FIELD) - addr += ALIGN(ctx->dst_fmt.width, 16); + addr += ALIGN(ctx->src_fmt.width, MB_DIM); vdpu_write_relaxed(vpu, addr, VDPU_REG_DEC_OUT_BASE); if (!forward_addr) @@ -169,7 +169,7 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx) src_buf = hantro_get_src_buf(ctx); dst_buf = hantro_get_dst_buf(ctx); - hantro_start_prepare_run(ctx); + hantro_prepare_run(ctx); slice_params = hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS); @@ -220,8 +220,8 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx) VDPU_REG_DEC_CLK_GATE_E(1); vdpu_write_relaxed(vpu, reg, VDPU_SWREG(57)); - reg = VDPU_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->dst_fmt.width)) | - VDPU_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->dst_fmt.height)) | + reg = VDPU_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)) | + VDPU_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) | VDPU_REG_ALT_SCAN_E(picture->alternate_scan) | VDPU_REG_TOPFIELDFIRST_E(picture->top_field_first); vdpu_write_relaxed(vpu, reg, VDPU_SWREG(120)); @@ -250,7 +250,7 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx) sequence, picture, slice_params); /* Kick the watchdog and start decoding */ - hantro_end_prepare_run(ctx); + hantro_finish_run(ctx); reg = vdpu_read(vpu, VDPU_SWREG(57)) | VDPU_REG_DEC_E(1); vdpu_write(vpu, reg, VDPU_SWREG(57)); diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c index a4a792f00b111..458f1f1f43ad2 100644 --- a/drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c +++ b/drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c @@ -508,12 +508,12 @@ void rk3399_vpu_vp8_dec_run(struct hantro_ctx *ctx) { const struct v4l2_ctrl_vp8_frame_header *hdr; struct hantro_dev *vpu = ctx->dev; - size_t height = ctx->dst_fmt.height; - size_t width = ctx->dst_fmt.width; + size_t height = ctx->src_fmt.height; + size_t width = ctx->src_fmt.width; u32 mb_width, mb_height; u32 reg; - hantro_start_prepare_run(ctx); + hantro_prepare_run(ctx); hdr = hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER); if (WARN_ON(!hdr)) @@ -587,7 +587,7 @@ void rk3399_vpu_vp8_dec_run(struct hantro_ctx *ctx) cfg_ref(ctx, hdr); cfg_buffers(ctx, hdr); - hantro_end_prepare_run(ctx); + hantro_finish_run(ctx); hantro_reg_write(vpu, &vp8_dec_start_dec, 1); } diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h index e877bf1d537cd..76020ebd1e6c6 100644 --- a/include/media/h264-ctrls.h +++ b/include/media/h264-ctrls.h @@ -185,6 +185,10 @@ struct v4l2_ctrl_h264_slice_params { #define V4L2_H264_DPB_ENTRY_FLAG_VALID 0x01 #define V4L2_H264_DPB_ENTRY_FLAG_ACTIVE 0x02 #define V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM 0x04 +#define V4L2_H264_DPB_ENTRY_FLAG_FIELD_PICTURE 0x08 +#define V4L2_H264_DPB_ENTRY_FLAG_REF_TOP 0x10 +#define V4L2_H264_DPB_ENTRY_FLAG_REF_BOTTOM 0x20 +#define V4L2_H264_DPB_ENTRY_FLAG_REF_FRAME 0x30 struct v4l2_h264_dpb_entry { __u64 reference_ts; -- GitLab