mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-11 08:13:06 +00:00
vulkan_ffv1: add Bayer decoder
Sponsored-by: Sovereign Tech Fund
This commit is contained in:
@@ -41,6 +41,15 @@ void ff_ffv1_vk_set_common_sl(AVCodecContext *avctx, FFV1Context *f,
|
||||
}
|
||||
|
||||
int bits = desc->comp[0].depth;
|
||||
/* Bayer pixfmts report misleading per-component depth in comp[0].depth
|
||||
* (it counts the fraction of bits each component contributes per output
|
||||
* pixel, not the per-sample bit width). Use bits_per_raw_sample. The
|
||||
* encoder fills f->bits_per_raw_sample directly; the decoder only
|
||||
* fills f->avctx->bits_per_raw_sample. Prefer the FFV1Context field
|
||||
* with the avctx field as a fallback so this works from both sides. */
|
||||
if (f->bayer)
|
||||
bits = f->bits_per_raw_sample ? f->bits_per_raw_sample
|
||||
: f->avctx->bits_per_raw_sample;
|
||||
SPEC_LIST_ADD(sl, 5, 32, (uint32_t)(1ULL << bits));
|
||||
SPEC_LIST_ADD(sl, 6, 32, f->colorspace);
|
||||
SPEC_LIST_ADD(sl, 7, 32, f->transparency);
|
||||
|
||||
@@ -25,7 +25,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \
|
||||
vulkan/ffv1_dec_rgb.comp.spv.o \
|
||||
vulkan/ffv1_dec_rgb_golomb.comp.spv.o \
|
||||
vulkan/ffv1_dec_rgb_float.comp.spv.o \
|
||||
vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o
|
||||
vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o \
|
||||
vulkan/ffv1_dec_bayer.comp.spv.o \
|
||||
vulkan/ffv1_dec_bayer_golomb.comp.spv.o
|
||||
|
||||
OBJS-$(CONFIG_PRORES_KS_VULKAN_ENCODER) += vulkan/prores_ks_alpha_data.comp.spv.o \
|
||||
vulkan/prores_ks_slice_data.comp.spv.o \
|
||||
|
||||
@@ -122,6 +122,8 @@ layout (set = 1, binding = 0, scalar) SB_QUALI buffer slice_ctx_buf {
|
||||
uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
|
||||
{
|
||||
uint mpw = 1 << chroma_shift;
|
||||
if (colorspace == 2)
|
||||
mpw = max(mpw, 2u);
|
||||
uint awidth = align(width, mpw);
|
||||
|
||||
if ((version < 4) || ((version == 4) && (micro_version < 3)))
|
||||
|
||||
@@ -247,6 +247,43 @@ void decode_line(ivec2 sp, int w,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BAYER
|
||||
void writeout_bayer(uint slice_idx, in SliceContext sc, ivec2 sp, int w, int y)
|
||||
{
|
||||
memoryBarrierImage();
|
||||
barrier();
|
||||
|
||||
int offset = rct_offset;
|
||||
|
||||
for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {
|
||||
ivec2 lpos = sp + LADDR(ivec2(x, y));
|
||||
ivec2 pos = sc.slice_pos + ivec2(int(x) << 1, y << 1);
|
||||
|
||||
int g_r = int(imageLoad(dec[0], lpos)[0]);
|
||||
int g_b = int(imageLoad(dec[1], lpos)[0]);
|
||||
int b = int(imageLoad(dec[2], lpos)[0]);
|
||||
int r = int(imageLoad(dec[3], lpos)[0]);
|
||||
|
||||
if (sc.slice_coding_mode != 1) {
|
||||
b -= offset;
|
||||
r -= offset;
|
||||
g_r -= (b*sc.slice_rct_coef.g + r*sc.slice_rct_coef.r) >> 2;
|
||||
b += g_r;
|
||||
r += g_r;
|
||||
|
||||
int gd = g_b - offset;
|
||||
g_b = g_r - (gd >> 1);
|
||||
g_r = g_b + gd;
|
||||
}
|
||||
|
||||
imageStore(dst[0], pos + ivec2(0, 0), uvec4(r));
|
||||
imageStore(dst[0], pos + ivec2(1, 0), uvec4(g_r));
|
||||
imageStore(dst[0], pos + ivec2(0, 1), uvec4(g_b));
|
||||
imageStore(dst[0], pos + ivec2(1, 1), uvec4(b));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef RGB
|
||||
ivec4 transform_sample(ivec4 pix, ivec2 rct_coef, int offset)
|
||||
{
|
||||
@@ -319,14 +356,32 @@ void decode_slice(in SliceContext sc, uint slice_idx)
|
||||
ivec2 sp = sc.slice_pos;
|
||||
u16vec4 bits = get_slice_bits(sc);
|
||||
|
||||
#ifdef RGB
|
||||
#ifdef BAYER
|
||||
/* Bayer logical dims: 2x2 blocks at half resolution */
|
||||
w >>= 1;
|
||||
int bayer_h = sc.slice_dim.y >> 1;
|
||||
sp.x >>= 1;
|
||||
sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
|
||||
/* c_bits = bps + 1 (the +1 is for is_rgb). For PCM mode, all planes use
|
||||
* raw bps. For non-PCM, gm uses bps; gd/b-gm/r-gm use bps+1. */
|
||||
if (sc.slice_coding_mode == 0)
|
||||
bits = u16vec4(c_bits - 1, c_bits, c_bits, c_bits);
|
||||
else
|
||||
bits = u16vec4(c_bits - 1, c_bits - 1, c_bits - 1, c_bits - 1);
|
||||
#elif defined(RGB)
|
||||
sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
|
||||
#endif
|
||||
|
||||
#ifndef GOLOMB
|
||||
/* PCM coding */
|
||||
if (sc.slice_coding_mode == 1) {
|
||||
#ifdef RGB
|
||||
#ifdef BAYER
|
||||
for (int y = 0; y < bayer_h; y++) {
|
||||
for (int p = 0; p < 4; p++)
|
||||
decode_line_pcm(sp, w, y, p);
|
||||
writeout_bayer(slice_idx, sc, sp, w, y);
|
||||
}
|
||||
#elif defined(RGB)
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
for (int p = 0; p < color_planes; p++)
|
||||
decode_line_pcm(sp, w, y, p);
|
||||
@@ -347,16 +402,31 @@ void decode_slice(in SliceContext sc, uint slice_idx)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BAYER
|
||||
u8vec4 quant_table_idx = sc.quant_table_idx.xzyy;
|
||||
u32vec4 slice_state_off = (slice_idx*codec_planes +
|
||||
uvec4(0, 2, 1, 1))*plane_state_size;
|
||||
#else
|
||||
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
|
||||
u32vec4 slice_state_off = (slice_idx*codec_planes +
|
||||
uvec4(0, 1, 1, 2))*plane_state_size;
|
||||
#endif
|
||||
|
||||
#ifdef GOLOMB
|
||||
slice_state_off >>= 3; // division by VLC_STATE_SIZE
|
||||
golomb_init();
|
||||
#endif
|
||||
|
||||
#ifdef RGB
|
||||
#ifdef BAYER
|
||||
int run_index = 0;
|
||||
for (int y = 0; y < bayer_h; y++) {
|
||||
for (int p = 0; p < 4; p++)
|
||||
decode_line(sp, w, y, p, bits[p],
|
||||
slice_state_off[p], quant_table_idx[p], run_index);
|
||||
|
||||
writeout_bayer(slice_idx, sc, sp, w, y);
|
||||
}
|
||||
#elif defined(RGB)
|
||||
int run_index = 0;
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
for (int p = 0; p < color_planes; p++)
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2026 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_shader_image_load_formatted : require
|
||||
|
||||
layout (set = 1, binding = 5) writeonly uniform uimage2D dst[];
|
||||
|
||||
#define RGB
|
||||
#define BAYER
|
||||
#include "ffv1_dec.comp.glsl"
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2026 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_shader_image_load_formatted : require
|
||||
|
||||
layout (set = 1, binding = 5) writeonly uniform uimage2D dst[];
|
||||
|
||||
#define RGB
|
||||
#define BAYER
|
||||
#define GOLOMB
|
||||
#include "ffv1_dec.comp.glsl"
|
||||
@@ -191,7 +191,7 @@ bool decode_slice_header(uint slice_idx, inout SliceContext sc)
|
||||
if (version >= 4) {
|
||||
sc.slice_reset_contexts = get_rac(rc_state[0]);
|
||||
sc.slice_coding_mode = get_usymbol(0);
|
||||
if (sc.slice_coding_mode != 1 && colorspace == 1) {
|
||||
if (sc.slice_coding_mode != 1 && colorspace != 0) {
|
||||
sc.slice_rct_coef.g = int(get_usymbol(0));
|
||||
sc.slice_rct_coef.r = int(get_usymbol(0));
|
||||
if (sc.slice_rct_coef.g + sc.slice_rct_coef.r > 4)
|
||||
|
||||
@@ -35,7 +35,7 @@ void update_vlc_state(inout VlcState state, in int v)
|
||||
int drift = state.drift;
|
||||
int count = state.count;
|
||||
int bias = state.bias;
|
||||
state.error_sum += uint16_t(abs(v));
|
||||
state.error_sum += uint32_t(abs(v));
|
||||
drift += v;
|
||||
|
||||
if (count == 128) { // FIXME: variable
|
||||
|
||||
@@ -54,6 +54,12 @@ extern const unsigned int ff_ffv1_dec_rgb_float_comp_spv_len;
|
||||
extern const unsigned char ff_ffv1_dec_rgb_float_golomb_comp_spv_data[];
|
||||
extern const unsigned int ff_ffv1_dec_rgb_float_golomb_comp_spv_len;
|
||||
|
||||
extern const unsigned char ff_ffv1_dec_bayer_comp_spv_data[];
|
||||
extern const unsigned int ff_ffv1_dec_bayer_comp_spv_len;
|
||||
|
||||
extern const unsigned char ff_ffv1_dec_bayer_golomb_comp_spv_data[];
|
||||
extern const unsigned int ff_ffv1_dec_bayer_golomb_comp_spv_len;
|
||||
|
||||
const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
|
||||
.codec_id = AV_CODEC_ID_FFV1,
|
||||
.queue_flags = VK_QUEUE_COMPUTE_BIT,
|
||||
@@ -393,7 +399,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
nb_img_bar = 0;
|
||||
nb_buf_bar = 0;
|
||||
|
||||
for (int i = 0; i < color_planes; i++)
|
||||
/* The intermediate frame has 4 planes (GBRAP16/32). Clear all of
|
||||
* them since the bayer decoder uses all four. */
|
||||
int n_dec_planes = f->bayer ? 4 : color_planes;
|
||||
for (int i = 0; i < n_dec_planes; i++)
|
||||
vk->CmdClearColorImage(exec->buf, vkf->img[i], VK_IMAGE_LAYOUT_GENERAL,
|
||||
&((VkClearColorValue) { 0 }),
|
||||
1, &((VkImageSubresourceRange) {
|
||||
@@ -519,7 +528,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
1, 5,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
if (fltmap_buf)
|
||||
if (fltmap_buf && !f->bayer)
|
||||
ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode,
|
||||
1, 6, 0,
|
||||
fltmap_buf,
|
||||
@@ -651,7 +660,8 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
|
||||
FFVkExecPool *pool, FFVulkanShader *shd,
|
||||
AVHWFramesContext *dec_frames_ctx,
|
||||
AVHWFramesContext *out_frames_ctx,
|
||||
VkSpecializationInfo *sl, int ac, int rgb)
|
||||
VkSpecializationInfo *sl, int ac, int rgb,
|
||||
int bayer)
|
||||
{
|
||||
int err;
|
||||
|
||||
@@ -707,10 +717,19 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
|
||||
},
|
||||
};
|
||||
ff_vk_shader_add_descriptor_set(s, shd, desc_set,
|
||||
5 + rgb + (f->micro_version >= 9),
|
||||
5 + rgb + (!bayer && f->micro_version >= 9),
|
||||
0, 0);
|
||||
|
||||
if (f->version >=4 && f->micro_version >= 9) {
|
||||
if (bayer) {
|
||||
if (ac == AC_GOLOMB_RICE)
|
||||
ff_vk_shader_link(s, shd,
|
||||
ff_ffv1_dec_bayer_golomb_comp_spv_data,
|
||||
ff_ffv1_dec_bayer_golomb_comp_spv_len, "main");
|
||||
else
|
||||
ff_vk_shader_link(s, shd,
|
||||
ff_ffv1_dec_bayer_comp_spv_data,
|
||||
ff_ffv1_dec_bayer_comp_spv_len, "main");
|
||||
} else if (f->version >=4 && f->micro_version >= 9) {
|
||||
if (ac == AC_GOLOMB_RICE)
|
||||
ff_vk_shader_link(s, shd,
|
||||
ff_ffv1_dec_rgb_float_golomb_comp_spv_data,
|
||||
@@ -809,7 +828,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
|
||||
FFv1VulkanDecodeContext *fv;
|
||||
|
||||
if (f->version < 3 ||
|
||||
(f->version == 4 && f->micro_version >= 10))
|
||||
(f->version == 4 && f->micro_version >= 10 && !f->bayer))
|
||||
return AVERROR(ENOTSUP);
|
||||
|
||||
/* Streams with a low amount of slices will usually be much slower
|
||||
@@ -861,7 +880,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
|
||||
|
||||
/* Decode shaders */
|
||||
RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, &fv->decode,
|
||||
dctx, hwfc, sl, f->ac, is_rgb));
|
||||
dctx, hwfc, sl, f->ac, is_rgb, f->bayer));
|
||||
|
||||
/* Init static data */
|
||||
RET(ff_ffv1_vk_init_consts(&ctx->s, &fv->consts_buf, f));
|
||||
|
||||
Reference in New Issue
Block a user