avcodec/h274: Make H274FilmGrainDatabase a shared object

Right now, the private contexts of every decoder supporting
H.274 film grain synthesis (namely H.264, HEVC and VVC)
contain a H274FilmGrainDatabase; said structure is very large
700442B before this commit) and takes up the overwhelming
majority of said contexts: Removing it reduces sizeof(H264Context)
by 92.88%, sizeof(HEVCContext) by 97.78% and sizeof(VVCContext)
by 99.86%. This is especially important for H.264 and HEVC
when using frame-threading.

The content of said film grain database does not depend on
any input parameter; it is shareable between all its users and
could be hardcoded in the binary (but isn't, because it is so huge).

This commit adds a database with static storage duration to h274.c
and uses it instead of the elements in the private contexts above.
It is still lazily initialized as-needed; a mutex is used
for the necessary synchronization. An alternative would be to use
an AV_ONCE to initialize the whole database either in the decoders'
init function (which would be wasteful given that most videos
don't use film grain synthesis) or in ff_h274_apply_film_grain().

Reviewed-by: Niklas Haas <ffmpeg@haasn.dev>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2025-09-22 04:54:22 +02:00
parent b5e0d9f648
commit c9168717bf
8 changed files with 38 additions and 35 deletions
+2 -2
View File
@@ -30,10 +30,10 @@
#include "error_resilience.h"
#include "avcodec.h"
#include "h264dec.h"
#include "h274.h"
#include "hwaccel_internal.h"
#include "mpegutils.h"
#include "libavutil/refstruct.h"
#include "thread.h"
#include "threadframe.h"
void ff_h264_unref_picture(H264Picture *pic)
@@ -213,7 +213,7 @@ int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
err = AVERROR_INVALIDDATA;
if (sd) // a decoding error may have happened before the side data could be allocated
err = ff_h274_apply_film_grain(cur->f_grain, cur->f, &h->h274db,
err = ff_h274_apply_film_grain(cur->f_grain, cur->f,
(AVFilmGrainParams *) sd->data);
if (err < 0) {
av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film "
-3
View File
@@ -28,7 +28,6 @@
#ifndef AVCODEC_H264DEC_H
#define AVCODEC_H264DEC_H
#include "libavutil/buffer.h"
#include "libavutil/mem_internal.h"
#include "cabac.h"
@@ -41,7 +40,6 @@
#include "h264dsp.h"
#include "h264pred.h"
#include "h264qpel.h"
#include "h274.h"
#include "mpegutils.h"
#include "threadframe.h"
#include "videodsp.h"
@@ -344,7 +342,6 @@ typedef struct H264Context {
H264DSPContext h264dsp;
H264ChromaContext h264chroma;
H264QpelContext h264qpel;
H274FilmGrainDatabase h274db;
H264Picture DPB[H264_MAX_PICTURE_COUNT];
H264Picture *cur_pic_ptr;
+33 -11
View File
@@ -25,6 +25,8 @@
* @author Niklas Haas <ffmpeg@haasn.xyz>
*/
#include <stdatomic.h>
#include "libavutil/avassert.h"
#include "libavutil/bswap.h"
#include "libavcodec/bswapdsp.h"
@@ -32,9 +34,18 @@
#include "libavutil/imgutils.h"
#include "libavutil/md5.h"
#include "libavutil/mem.h"
#include "libavutil/thread.h"
#include "h274.h"
typedef struct H274FilmGrainDatabase {
// Database of film grain patterns, lazily computed as-needed
int8_t db[13 /* h */][13 /* v */][64][64];
atomic_uint residency[6];
} H274FilmGrainDatabase;
static H274FilmGrainDatabase film_grain_db;
static const int8_t Gaussian_LUT[2048+4];
static const uint32_t Seed_LUT[256];
static const int8_t R64T[64][64];
@@ -47,8 +58,7 @@ static void prng_shift(uint32_t *state)
*state = (x << 1) | (feedback & 1u);
}
static void init_slice_c(int8_t out[64][64], uint8_t h, uint8_t v,
int16_t tmp[64][64])
static void init_slice_c(int8_t out[64][64], uint8_t h, uint8_t v)
{
static const uint8_t deblock_factors[13] = {
64, 71, 77, 84, 90, 96, 103, 109, 116, 122, 128, 128, 128
@@ -57,6 +67,9 @@ static void init_slice_c(int8_t out[64][64], uint8_t h, uint8_t v,
const uint8_t deblock_coeff = deblock_factors[v];
const uint8_t freq_h = ((h + 3) << 2) - 1;
const uint8_t freq_v = ((v + 3) << 2) - 1;
// Temporary buffer for slice generation
// FIXME: Static or not?
static int16_t tmp[64][64];
uint32_t seed = Seed_LUT[h + v * 13];
// Initialize with random gaussian values, using the output array as a
@@ -106,13 +119,24 @@ static void init_slice_c(int8_t out[64][64], uint8_t h, uint8_t v,
}
}
static void init_slice(H274FilmGrainDatabase *database, uint8_t h, uint8_t v)
static void init_slice(uint8_t h, uint8_t v)
{
if (database->residency[h] & (1 << v))
static AVMutex mutex = AV_MUTEX_INITIALIZER;
unsigned bitpos = h * 13 + v;
unsigned res = atomic_load_explicit(&film_grain_db.residency[bitpos / 32],
memory_order_acquire);
if (res & (1U << (bitpos & 31)))
return;
database->residency[h] |= (1 << v);
init_slice_c(database->db[h][v], h, v, database->slice_tmp);
ff_mutex_lock(&mutex);
res = atomic_load_explicit(&film_grain_db.residency[bitpos / 32], memory_order_relaxed);
if (!(res & (1U << (bitpos & 31)))) {
init_slice_c(film_grain_db.db[h][v], h, v);
atomic_store_explicit(&film_grain_db.residency[bitpos / 32],
res | (1U << (bitpos & 31)), memory_order_release);
}
ff_mutex_unlock(&mutex);
}
// Computes the average of an 8x8 block
@@ -160,7 +184,6 @@ static void deblock_8x8_c(int8_t *out, const int out_stride)
// deblocking step (note that this implies writing to the previous block).
static av_always_inline void generate(int8_t *out, int out_stride,
const uint8_t *in, int in_stride,
H274FilmGrainDatabase *database,
const AVFilmGrainH274Params *h274,
int c, int invert, int deblock,
int y_offset, int x_offset)
@@ -198,14 +221,14 @@ static av_always_inline void generate(int8_t *out, int out_stride,
h = av_clip(h274->comp_model_value[c][s][1], 2, 14) - 2;
v = av_clip(h274->comp_model_value[c][s][2], 2, 14) - 2;
init_slice(database, h, v);
init_slice(h, v);
scale = h274->comp_model_value[c][s][0];
if (invert)
scale = -scale;
synth_grain_8x8_c(out, out_stride, scale, shift,
&database->db[h][v][y_offset][x_offset]);
&film_grain_db.db[h][v][y_offset][x_offset]);
if (deblock)
deblock_8x8_c(out, out_stride);
@@ -220,7 +243,6 @@ static void add_8x8_clip_c(uint8_t *out, const uint8_t *a, const int8_t *b,
}
int ff_h274_apply_film_grain(AVFrame *out_frame, const AVFrame *in_frame,
H274FilmGrainDatabase *database,
const AVFilmGrainParams *params)
{
AVFilmGrainH274Params h274 = params->codec.h274;
@@ -275,7 +297,7 @@ int ff_h274_apply_film_grain(AVFrame *out_frame, const AVFrame *in_frame,
for (int xx = 0; xx < 16 && x+xx < width; xx += 8) {
generate(grain + (y+yy) * grain_stride + (x+xx), grain_stride,
in + (y+yy) * in_stride + (x+xx), in_stride,
database, &h274, c, invert, (x+xx) > 0,
&h274, c, invert, (x+xx) > 0,
y_offset + yy, x_offset + xx);
}
}
-11
View File
@@ -30,16 +30,6 @@
#include "libavutil/film_grain_params.h"
// Must be initialized to {0} prior to first usage
typedef struct H274FilmGrainDatabase {
// Database of film grain patterns, lazily computed as-needed
int8_t db[13 /* h */][13 /* v */][64][64];
uint16_t residency[13 /* h */]; // bit field of v
// Temporary buffer for slice generation
int16_t slice_tmp[64][64];
} H274FilmGrainDatabase;
/**
* Check whether ff_h274_apply_film_grain() supports the given parameter combination.
*
@@ -61,7 +51,6 @@ static inline int ff_h274_film_grain_params_supported(int model_id, enum AVPixel
// ff_h274_film_grain_params_supported() coincide with actual values
// from the frames and params.
int ff_h274_apply_film_grain(AVFrame *out, const AVFrame *in,
H274FilmGrainDatabase *db,
const AVFilmGrainParams *params);
typedef struct H274HashContext H274HashContext;
+2 -2
View File
@@ -45,6 +45,7 @@
#include "codec_internal.h"
#include "decode.h"
#include "golomb.h"
#include "h274.h"
#include "hevc.h"
#include "parse.h"
#include "hevcdec.h"
@@ -3496,8 +3497,7 @@ static int hevc_frame_end(HEVCContext *s, HEVCLayerContext *l)
av_assert0(0);
return AVERROR_BUG;
case AV_FILM_GRAIN_PARAMS_H274:
ret = ff_h274_apply_film_grain(out->frame_grain, out->f,
&s->h274db, fgp);
ret = ff_h274_apply_film_grain(out->frame_grain, out->f, fgp);
break;
case AV_FILM_GRAIN_PARAMS_AV1:
ret = ff_aom_apply_film_grain(out->frame_grain, out->f, fgp);
-3
View File
@@ -32,9 +32,7 @@
#include "libavcodec/bswapdsp.h"
#include "libavcodec/cabac.h"
#include "libavcodec/dovi_rpu.h"
#include "libavcodec/get_bits.h"
#include "libavcodec/h2645_parse.h"
#include "libavcodec/h274.h"
#include "libavcodec/progressframe.h"
#include "libavcodec/videodsp.h"
@@ -537,7 +535,6 @@ typedef struct HEVCContext {
HEVCDSPContext hevcdsp;
VideoDSPContext vdsp;
BswapDSPContext bdsp;
H274FilmGrainDatabase h274db;
/** used on BE to byteswap the lines for checksumming */
uint8_t *checksum_buf;
+1 -2
View File
@@ -1091,8 +1091,7 @@ static int frame_end(VVCContext *s, VVCFrameContext *fc)
av_assert0(0);
return AVERROR_BUG;
case AV_FILM_GRAIN_PARAMS_H274:
ret = ff_h274_apply_film_grain(fc->ref->frame_grain, fc->ref->frame,
&s->h274db, fgp);
ret = ff_h274_apply_film_grain(fc->ref->frame_grain, fc->ref->frame, fgp);
if (ret < 0)
return ret;
break;
-1
View File
@@ -222,7 +222,6 @@ typedef struct VVCContext {
CodedBitstreamFragment current_frame;
VVCParamSets ps;
H274FilmGrainDatabase h274db;
int temporal_id; ///< temporal_id_plus1 - 1
int poc_tid0;