vulkan_ffv1: add Bayer decoder

Sponsored-by: Sovereign Tech Fund
This commit is contained in:
Lynne
2026-06-03 14:12:50 +09:00
parent 151fe3a5ac
commit 713f191c24
9 changed files with 178 additions and 13 deletions
+9
View File
@@ -41,6 +41,15 @@ void ff_ffv1_vk_set_common_sl(AVCodecContext *avctx, FFV1Context *f,
}
int bits = desc->comp[0].depth;
/* Bayer pixfmts report misleading per-component depth in comp[0].depth
* (it counts the fraction of bits each component contributes per output
* pixel, not the per-sample bit width). Use bits_per_raw_sample. The
* encoder fills f->bits_per_raw_sample directly; the decoder only
* fills f->avctx->bits_per_raw_sample. Prefer the FFV1Context field
* with the avctx field as a fallback so this works from both sides. */
if (f->bayer)
bits = f->bits_per_raw_sample ? f->bits_per_raw_sample
: f->avctx->bits_per_raw_sample;
SPEC_LIST_ADD(sl, 5, 32, (uint32_t)(1ULL << bits));
SPEC_LIST_ADD(sl, 6, 32, f->colorspace);
SPEC_LIST_ADD(sl, 7, 32, f->transparency);
+3 -1
View File
@@ -25,7 +25,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \
vulkan/ffv1_dec_rgb.comp.spv.o \
vulkan/ffv1_dec_rgb_golomb.comp.spv.o \
vulkan/ffv1_dec_rgb_float.comp.spv.o \
vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o
vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o \
vulkan/ffv1_dec_bayer.comp.spv.o \
vulkan/ffv1_dec_bayer_golomb.comp.spv.o
OBJS-$(CONFIG_PRORES_KS_VULKAN_ENCODER) += vulkan/prores_ks_alpha_data.comp.spv.o \
vulkan/prores_ks_slice_data.comp.spv.o \
+2
View File
@@ -122,6 +122,8 @@ layout (set = 1, binding = 0, scalar) SB_QUALI buffer slice_ctx_buf {
uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
{
uint mpw = 1 << chroma_shift;
if (colorspace == 2)
mpw = max(mpw, 2u);
uint awidth = align(width, mpw);
if ((version < 4) || ((version == 4) && (micro_version < 3)))
+73 -3
View File
@@ -247,6 +247,43 @@ void decode_line(ivec2 sp, int w,
}
#endif
#ifdef BAYER
void writeout_bayer(uint slice_idx, in SliceContext sc, ivec2 sp, int w, int y)
{
memoryBarrierImage();
barrier();
int offset = rct_offset;
for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {
ivec2 lpos = sp + LADDR(ivec2(x, y));
ivec2 pos = sc.slice_pos + ivec2(int(x) << 1, y << 1);
int g_r = int(imageLoad(dec[0], lpos)[0]);
int g_b = int(imageLoad(dec[1], lpos)[0]);
int b = int(imageLoad(dec[2], lpos)[0]);
int r = int(imageLoad(dec[3], lpos)[0]);
if (sc.slice_coding_mode != 1) {
b -= offset;
r -= offset;
g_r -= (b*sc.slice_rct_coef.g + r*sc.slice_rct_coef.r) >> 2;
b += g_r;
r += g_r;
int gd = g_b - offset;
g_b = g_r - (gd >> 1);
g_r = g_b + gd;
}
imageStore(dst[0], pos + ivec2(0, 0), uvec4(r));
imageStore(dst[0], pos + ivec2(1, 0), uvec4(g_r));
imageStore(dst[0], pos + ivec2(0, 1), uvec4(g_b));
imageStore(dst[0], pos + ivec2(1, 1), uvec4(b));
}
}
#endif
#ifdef RGB
ivec4 transform_sample(ivec4 pix, ivec2 rct_coef, int offset)
{
@@ -319,14 +356,32 @@ void decode_slice(in SliceContext sc, uint slice_idx)
ivec2 sp = sc.slice_pos;
u16vec4 bits = get_slice_bits(sc);
#ifdef RGB
#ifdef BAYER
/* Bayer logical dims: 2x2 blocks at half resolution */
w >>= 1;
int bayer_h = sc.slice_dim.y >> 1;
sp.x >>= 1;
sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
/* c_bits = bps + 1 (the +1 is for is_rgb). For PCM mode, all planes use
* raw bps. For non-PCM, gm uses bps; gd/b-gm/r-gm use bps+1. */
if (sc.slice_coding_mode == 0)
bits = u16vec4(c_bits - 1, c_bits, c_bits, c_bits);
else
bits = u16vec4(c_bits - 1, c_bits - 1, c_bits - 1, c_bits - 1);
#elif defined(RGB)
sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
#endif
#ifndef GOLOMB
/* PCM coding */
if (sc.slice_coding_mode == 1) {
#ifdef RGB
#ifdef BAYER
for (int y = 0; y < bayer_h; y++) {
for (int p = 0; p < 4; p++)
decode_line_pcm(sp, w, y, p);
writeout_bayer(slice_idx, sc, sp, w, y);
}
#elif defined(RGB)
for (int y = 0; y < sc.slice_dim.y; y++) {
for (int p = 0; p < color_planes; p++)
decode_line_pcm(sp, w, y, p);
@@ -347,16 +402,31 @@ void decode_slice(in SliceContext sc, uint slice_idx)
}
#endif
#ifdef BAYER
u8vec4 quant_table_idx = sc.quant_table_idx.xzyy;
u32vec4 slice_state_off = (slice_idx*codec_planes +
uvec4(0, 2, 1, 1))*plane_state_size;
#else
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
u32vec4 slice_state_off = (slice_idx*codec_planes +
uvec4(0, 1, 1, 2))*plane_state_size;
#endif
#ifdef GOLOMB
slice_state_off >>= 3; // division by VLC_STATE_SIZE
golomb_init();
#endif
#ifdef RGB
#ifdef BAYER
int run_index = 0;
for (int y = 0; y < bayer_h; y++) {
for (int p = 0; p < 4; p++)
decode_line(sp, w, y, p, bits[p],
slice_state_off[p], quant_table_idx[p], run_index);
writeout_bayer(slice_idx, sc, sp, w, y);
}
#elif defined(RGB)
int run_index = 0;
for (int y = 0; y < sc.slice_dim.y; y++) {
for (int p = 0; p < color_planes; p++)
@@ -0,0 +1,31 @@
/*
* FFv1 codec
*
* Copyright (c) 2026 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_shader_image_load_formatted : require
layout (set = 1, binding = 5) writeonly uniform uimage2D dst[];
#define RGB
#define BAYER
#include "ffv1_dec.comp.glsl"
@@ -0,0 +1,32 @@
/*
* FFv1 codec
*
* Copyright (c) 2026 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_shader_image_load_formatted : require
layout (set = 1, binding = 5) writeonly uniform uimage2D dst[];
#define RGB
#define BAYER
#define GOLOMB
#include "ffv1_dec.comp.glsl"
+1 -1
View File
@@ -191,7 +191,7 @@ bool decode_slice_header(uint slice_idx, inout SliceContext sc)
if (version >= 4) {
sc.slice_reset_contexts = get_rac(rc_state[0]);
sc.slice_coding_mode = get_usymbol(0);
if (sc.slice_coding_mode != 1 && colorspace == 1) {
if (sc.slice_coding_mode != 1 && colorspace != 0) {
sc.slice_rct_coef.g = int(get_usymbol(0));
sc.slice_rct_coef.r = int(get_usymbol(0));
if (sc.slice_rct_coef.g + sc.slice_rct_coef.r > 4)
+1 -1
View File
@@ -35,7 +35,7 @@ void update_vlc_state(inout VlcState state, in int v)
int drift = state.drift;
int count = state.count;
int bias = state.bias;
state.error_sum += uint16_t(abs(v));
state.error_sum += uint32_t(abs(v));
drift += v;
if (count == 128) { // FIXME: variable
+26 -7
View File
@@ -54,6 +54,12 @@ extern const unsigned int ff_ffv1_dec_rgb_float_comp_spv_len;
extern const unsigned char ff_ffv1_dec_rgb_float_golomb_comp_spv_data[];
extern const unsigned int ff_ffv1_dec_rgb_float_golomb_comp_spv_len;
extern const unsigned char ff_ffv1_dec_bayer_comp_spv_data[];
extern const unsigned int ff_ffv1_dec_bayer_comp_spv_len;
extern const unsigned char ff_ffv1_dec_bayer_golomb_comp_spv_data[];
extern const unsigned int ff_ffv1_dec_bayer_golomb_comp_spv_len;
const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
.codec_id = AV_CODEC_ID_FFV1,
.queue_flags = VK_QUEUE_COMPUTE_BIT,
@@ -393,7 +399,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
nb_img_bar = 0;
nb_buf_bar = 0;
for (int i = 0; i < color_planes; i++)
/* The intermediate frame has 4 planes (GBRAP16/32). Clear all of
* them since the bayer decoder uses all four. */
int n_dec_planes = f->bayer ? 4 : color_planes;
for (int i = 0; i < n_dec_planes; i++)
vk->CmdClearColorImage(exec->buf, vkf->img[i], VK_IMAGE_LAYOUT_GENERAL,
&((VkClearColorValue) { 0 }),
1, &((VkImageSubresourceRange) {
@@ -519,7 +528,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
1, 5,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
if (fltmap_buf)
if (fltmap_buf && !f->bayer)
ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode,
1, 6, 0,
fltmap_buf,
@@ -651,7 +660,8 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
FFVkExecPool *pool, FFVulkanShader *shd,
AVHWFramesContext *dec_frames_ctx,
AVHWFramesContext *out_frames_ctx,
VkSpecializationInfo *sl, int ac, int rgb)
VkSpecializationInfo *sl, int ac, int rgb,
int bayer)
{
int err;
@@ -707,10 +717,19 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
},
};
ff_vk_shader_add_descriptor_set(s, shd, desc_set,
5 + rgb + (f->micro_version >= 9),
5 + rgb + (!bayer && f->micro_version >= 9),
0, 0);
if (f->version >=4 && f->micro_version >= 9) {
if (bayer) {
if (ac == AC_GOLOMB_RICE)
ff_vk_shader_link(s, shd,
ff_ffv1_dec_bayer_golomb_comp_spv_data,
ff_ffv1_dec_bayer_golomb_comp_spv_len, "main");
else
ff_vk_shader_link(s, shd,
ff_ffv1_dec_bayer_comp_spv_data,
ff_ffv1_dec_bayer_comp_spv_len, "main");
} else if (f->version >=4 && f->micro_version >= 9) {
if (ac == AC_GOLOMB_RICE)
ff_vk_shader_link(s, shd,
ff_ffv1_dec_rgb_float_golomb_comp_spv_data,
@@ -809,7 +828,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
FFv1VulkanDecodeContext *fv;
if (f->version < 3 ||
(f->version == 4 && f->micro_version >= 10))
(f->version == 4 && f->micro_version >= 10 && !f->bayer))
return AVERROR(ENOTSUP);
/* Streams with a low amount of slices will usually be much slower
@@ -861,7 +880,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
/* Decode shaders */
RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, &fv->decode,
dctx, hwfc, sl, f->ac, is_rgb));
dctx, hwfc, sl, f->ac, is_rgb, f->bayer));
/* Init static data */
RET(ff_ffv1_vk_init_consts(&ctx->s, &fv->consts_buf, f));