aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/media/sunxi/cedrus/cedrus_h264.c')
-rw-r--r--drivers/staging/media/sunxi/cedrus/cedrus_h264.c147
1 files changed, 130 insertions, 17 deletions
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
index d6a782703c9b..bfb4a4820a67 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
@@ -6,6 +6,7 @@
* Copyright (c) 2018 Bootlin
*/
+#include <linux/delay.h>
#include <linux/types.h>
#include <media/videobuf2-dma-contig.h>
@@ -38,7 +39,7 @@ struct cedrus_h264_sram_ref_pic {
#define CEDRUS_H264_FRAME_NUM 18
#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (16 * SZ_1K)
-#define CEDRUS_PIC_INFO_BUF_SIZE (128 * SZ_1K)
+#define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K)
static void cedrus_h264_write_sram(struct cedrus_dev *dev,
enum cedrus_h264_sram_off off,
@@ -96,7 +97,7 @@ static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
- struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
+ struct vb2_queue *cap_q;
struct cedrus_buffer *output_buf;
struct cedrus_dev *dev = ctx->dev;
unsigned long used_dpbs = 0;
@@ -104,6 +105,8 @@ static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
unsigned int output = 0;
unsigned int i;
+ cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+
memset(pic_list, 0, sizeof(pic_list));
for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) {
@@ -167,12 +170,14 @@ static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
enum cedrus_h264_sram_off sram)
{
const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
- struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
+ struct vb2_queue *cap_q;
struct cedrus_dev *dev = ctx->dev;
u8 sram_array[CEDRUS_MAX_REF_IDX];
unsigned int i;
size_t size;
+ cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+
memset(sram_array, 0, sizeof(sram_array));
for (i = 0; i < num_ref; i++) {
@@ -240,8 +245,8 @@ static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
sizeof(scaling->scaling_list_8x8[0]));
cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
- scaling->scaling_list_8x8[3],
- sizeof(scaling->scaling_list_8x8[3]));
+ scaling->scaling_list_8x8[1],
+ sizeof(scaling->scaling_list_8x8[1]));
cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
scaling->scaling_list_4x4,
@@ -289,6 +294,28 @@ static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
}
}
+/*
+ * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In
+ * rare cases frame is not decoded correctly. However, setting offset to 0 and
+ * skipping appropriate amount of bits with flush bits trigger always works.
+ */
+static void cedrus_skip_bits(struct cedrus_dev *dev, int num)
+{
+ int count = 0;
+
+ while (count < num) {
+ int tmp = min(num - count, 32);
+
+ cedrus_write(dev, VE_H264_TRIGGER_TYPE,
+ VE_H264_TRIGGER_TYPE_FLUSH_BITS |
+ VE_H264_TRIGGER_TYPE_N_BITS(tmp));
+ while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY)
+ udelay(1);
+
+ count += tmp;
+ }
+}
+
static void cedrus_set_params(struct cedrus_ctx *ctx,
struct cedrus_run *run)
{
@@ -299,12 +326,13 @@ static void cedrus_set_params(struct cedrus_ctx *ctx,
struct vb2_buffer *src_buf = &run->src->vb2_buf;
struct cedrus_dev *dev = ctx->dev;
dma_addr_t src_buf_addr;
- u32 offset = slice->header_bit_size;
- u32 len = (slice->size * 8) - offset;
+ u32 len = slice->size * 8;
+ unsigned int pic_width_in_mbs;
+ bool mbaff_pic;
u32 reg;
cedrus_write(dev, VE_H264_VLD_LEN, len);
- cedrus_write(dev, VE_H264_VLD_OFFSET, offset);
+ cedrus_write(dev, VE_H264_VLD_OFFSET, 0);
src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
cedrus_write(dev, VE_H264_VLD_END,
@@ -314,6 +342,20 @@ static void cedrus_set_params(struct cedrus_ctx *ctx,
VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
VE_H264_VLD_ADDR_LAST);
+ if (ctx->src_fmt.width > 2048) {
+ cedrus_write(dev, VE_BUF_CTRL,
+ VE_BUF_CTRL_INTRAPRED_MIXED_RAM |
+ VE_BUF_CTRL_DBLK_MIXED_RAM);
+ cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR,
+ ctx->codec.h264.deblk_buf_dma);
+ cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR,
+ ctx->codec.h264.intra_pred_buf_dma);
+ } else {
+ cedrus_write(dev, VE_BUF_CTRL,
+ VE_BUF_CTRL_INTRAPRED_INT_SRAM |
+ VE_BUF_CTRL_DBLK_INT_SRAM);
+ }
+
/*
* FIXME: Since the bitstream parsing is done in software, and
* in userspace, this shouldn't be needed anymore. But it
@@ -323,6 +365,8 @@ static void cedrus_set_params(struct cedrus_ctx *ctx,
cedrus_write(dev, VE_H264_TRIGGER_TYPE,
VE_H264_TRIGGER_TYPE_INIT_SWDEC);
+ cedrus_skip_bits(dev, slice->header_bit_size);
+
if (((pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) &&
(slice->slice_type == V4L2_H264_SLICE_TYPE_P ||
slice->slice_type == V4L2_H264_SLICE_TYPE_SP)) ||
@@ -370,12 +414,20 @@ static void cedrus_set_params(struct cedrus_ctx *ctx,
reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
cedrus_write(dev, VE_H264_SPS, reg);
+ mbaff_pic = !(slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) &&
+ (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
+ pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
+
// slice parameters
reg = 0;
+ reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24;
+ reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) *
+ (mbaff_pic + 1)) & 0xff) << 16;
reg |= decode->nal_ref_idc ? BIT(12) : 0;
reg |= (slice->slice_type & 0xf) << 8;
reg |= slice->cabac_init_idc & 0x3;
- reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
+ if (ctx->fh.m2m_ctx->new_frame)
+ reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
reg |= VE_H264_SHS_FIELD_PIC;
if (slice->flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
@@ -447,7 +499,7 @@ static void cedrus_h264_setup(struct cedrus_ctx *ctx,
{
struct cedrus_dev *dev = ctx->dev;
- cedrus_engine_enable(dev, CEDRUS_CODEC_H264);
+ cedrus_engine_enable(ctx, CEDRUS_CODEC_H264);
cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
@@ -464,18 +516,30 @@ static void cedrus_h264_setup(struct cedrus_ctx *ctx,
static int cedrus_h264_start(struct cedrus_ctx *ctx)
{
struct cedrus_dev *dev = ctx->dev;
+ unsigned int pic_info_size;
unsigned int field_size;
unsigned int mv_col_size;
int ret;
+ /* Formula for picture buffer size is taken from CedarX source. */
+
+ if (ctx->src_fmt.width > 2048)
+ pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000;
+ else
+ pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000;
+
/*
- * FIXME: It seems that the H6 cedarX code is using a formula
- * here based on the size of the frame, while all the older
- * code is using a fixed size, so that might need to be
- * changed at some point.
+ * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set,
+ * there is no need to multiply by 2.
*/
+ pic_info_size += ctx->src_fmt.height * 2 * 64;
+
+ if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE)
+ pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE;
+
+ ctx->codec.h264.pic_info_buf_size = pic_info_size;
ctx->codec.h264.pic_info_buf =
- dma_alloc_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE,
+ dma_alloc_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
&ctx->codec.h264.pic_info_buf_dma,
GFP_KERNEL);
if (!ctx->codec.h264.pic_info_buf)
@@ -528,15 +592,56 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx)
goto err_neighbor_buf;
}
+ if (ctx->src_fmt.width > 2048) {
+ /*
+ * Formulas for deblock and intra prediction buffer sizes
+ * are taken from CedarX source.
+ */
+
+ ctx->codec.h264.deblk_buf_size =
+ ALIGN(ctx->src_fmt.width, 32) * 12;
+ ctx->codec.h264.deblk_buf =
+ dma_alloc_coherent(dev->dev,
+ ctx->codec.h264.deblk_buf_size,
+ &ctx->codec.h264.deblk_buf_dma,
+ GFP_KERNEL);
+ if (!ctx->codec.h264.deblk_buf) {
+ ret = -ENOMEM;
+ goto err_mv_col_buf;
+ }
+
+ ctx->codec.h264.intra_pred_buf_size =
+ ALIGN(ctx->src_fmt.width, 64) * 5;
+ ctx->codec.h264.intra_pred_buf =
+ dma_alloc_coherent(dev->dev,
+ ctx->codec.h264.intra_pred_buf_size,
+ &ctx->codec.h264.intra_pred_buf_dma,
+ GFP_KERNEL);
+ if (!ctx->codec.h264.intra_pred_buf) {
+ ret = -ENOMEM;
+ goto err_deblk_buf;
+ }
+ }
+
return 0;
+err_deblk_buf:
+ dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size,
+ ctx->codec.h264.deblk_buf,
+ ctx->codec.h264.deblk_buf_dma);
+
+err_mv_col_buf:
+ dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
+ ctx->codec.h264.mv_col_buf,
+ ctx->codec.h264.mv_col_buf_dma);
+
err_neighbor_buf:
dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
ctx->codec.h264.neighbor_info_buf,
ctx->codec.h264.neighbor_info_buf_dma);
err_pic_buf:
- dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE,
+ dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
ctx->codec.h264.pic_info_buf,
ctx->codec.h264.pic_info_buf_dma);
return ret;
@@ -552,9 +657,17 @@ static void cedrus_h264_stop(struct cedrus_ctx *ctx)
dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
ctx->codec.h264.neighbor_info_buf,
ctx->codec.h264.neighbor_info_buf_dma);
- dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE,
+ dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size,
ctx->codec.h264.pic_info_buf,
ctx->codec.h264.pic_info_buf_dma);
+ if (ctx->codec.h264.deblk_buf_size)
+ dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size,
+ ctx->codec.h264.deblk_buf,
+ ctx->codec.h264.deblk_buf_dma);
+ if (ctx->codec.h264.intra_pred_buf_size)
+ dma_free_coherent(dev->dev, ctx->codec.h264.intra_pred_buf_size,
+ ctx->codec.h264.intra_pred_buf,
+ ctx->codec.h264.intra_pred_buf_dma);
}
static void cedrus_h264_trigger(struct cedrus_ctx *ctx)