mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-11 08:13:06 +00:00
vulkan/ffv1: read raw 16-bit float images via R16_UINT view to preserve denormals
GPUs filter out denormals when reading floats via imageLoad. Denormals shouldn't be present in general, but if they are, this is a lossless codec, and we have to preserve them. This allows reading the exact values. Sponsored-by: Sovereign Tech Fund
This commit is contained in:
@@ -368,8 +368,12 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
||||
ff_vk_exec_start(&fv->s, exec);
|
||||
fd->idx = exec->idx;
|
||||
|
||||
/* For float pixel formats we want the raw bit pattern, not a value
|
||||
* already passed through fp16/fp32 conversion (which can flush
|
||||
* denormals). Use a UINT view in that case. */
|
||||
RET(ff_vk_create_imageviews(&fv->s, exec, src_views, src,
|
||||
FF_VK_REP_NATIVE));
|
||||
f->remap_mode ? FF_VK_REP_UINT
|
||||
: FF_VK_REP_NATIVE));
|
||||
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1);
|
||||
|
||||
@@ -34,13 +34,15 @@ layout (set = 0, binding = 2, scalar) uniform crc_ieee_buf {
|
||||
layout (set = 1, binding = 1, scalar) writeonly buffer slice_results_buf {
|
||||
uint32_t slice_results[];
|
||||
};
|
||||
/* Source images are bound as UINT (raw bits) regardless of the underlying
|
||||
* pixel format. Integer formats are passed through unchanged; for float
|
||||
* formats this avoids the fp16/fp32 conversion that would otherwise flush
|
||||
* denormals before we get to look at them. */
|
||||
layout (set = 1, binding = 3) uniform uimage2D src[];
|
||||
#ifdef FLOAT
|
||||
layout (set = 1, binding = 3) uniform image2D src[];
|
||||
layout (set = 1, binding = 5) readonly buffer fltmap_buf {
|
||||
uint fltmap[][4][65536];
|
||||
};
|
||||
#else
|
||||
layout (set = 1, binding = 3) uniform uimage2D src[];
|
||||
#endif
|
||||
|
||||
#ifndef GOLOMB
|
||||
@@ -237,9 +239,10 @@ ivec4 load_components(uint slice_idx, in SliceContext sc, ivec2 pos)
|
||||
{
|
||||
ivec4 pix;
|
||||
#ifdef FLOAT
|
||||
/* Source view is r16_uint so imageLoad returns the raw fp16 bit pattern
|
||||
* in .x; no conversion is performed and denormals survive. */
|
||||
for (int i = 0; i < color_planes; i++) {
|
||||
float16_t v = float16_t(imageLoad(src[i], pos));
|
||||
uint16_t iv = float16BitsToUint16(v);
|
||||
uint iv = imageLoad(src[i], pos)[0] & 0xFFFFu;
|
||||
pix[i] = int(fltmap[slice_idx][i][iv]);
|
||||
}
|
||||
#else
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#include "common.glsl"
|
||||
#include "ffv1_common.glsl"
|
||||
|
||||
layout (set = 1, binding = 1) uniform image2D src[];
|
||||
layout (set = 1, binding = 1) uniform uimage2D src[];
|
||||
|
||||
layout (set = 1, binding = 2) buffer fltmap_buf {
|
||||
uint fltmap[][4][65536];
|
||||
@@ -53,8 +53,9 @@ void load_fltmap(uint slice_idx, uint p)
|
||||
|
||||
for (uint y = sys + gl_LocalInvocationID.y; y < sye; y += gl_WorkGroupSize.y) {
|
||||
for (uint x = sxs + gl_LocalInvocationID.x; x < sxe; x += gl_WorkGroupSize.x) {
|
||||
vec4 pix = imageLoad(src[p], ivec2(x, y));
|
||||
uint16_t pix_idx = float16BitsToUint16(float16_t(pix[0]));
|
||||
/* Source view is r16_uint so the .x lane is the raw fp16 bit
|
||||
* pattern; no conversion is performed and denormals survive. */
|
||||
uint pix_idx = imageLoad(src[p], ivec2(x, y))[0] & 0xFFFFu;
|
||||
atomicOr(fltmap[slice_idx][p][pix_idx], 1);
|
||||
}
|
||||
}
|
||||
|
||||
+2
-2
@@ -1927,8 +1927,8 @@ static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt)
|
||||
{
|
||||
VK_FORMAT_R16_SFLOAT,
|
||||
VK_FORMAT_R16_SFLOAT,
|
||||
VK_FORMAT_UNDEFINED,
|
||||
VK_FORMAT_UNDEFINED,
|
||||
VK_FORMAT_R16_SINT,
|
||||
VK_FORMAT_R16_UINT,
|
||||
},
|
||||
};
|
||||
#undef REPS_FMT_PACK
|
||||
|
||||
Reference in New Issue
Block a user