swscale/uops: generate uop helper macros

This follows the same approach as is used currently by ops_entries_aarch64,
except I decided to have the generation logic live directly in uops.c
to allow re-using internal helpers and move it closer to the other helpers
that depend on the exact set of uops and their fields.

Unlike libswscale/tests/sws_ops.c, we make an effort to actually test all
relevant flag combinations, since these can affect the generated op lists.

I will use these macros to auto-generate both the C template-based kernels,
as well as the entire x86 backend, in the near future, hence their excessive
flexibility.

Re-use the libswscale/tests/sws_ops.c that we already compile. We could put it
in its own file but this is just as convenient, and it's easily moved anyways.
Having it be a FATE test ensures that it is always up-to-date.

Signed-off-by: Niklas Haas <git@haasn.dev>
This commit is contained in:
Niklas Haas
2026-06-09 18:27:20 +02:00
parent 8ad7cc6ccd
commit adaf142647
6 changed files with 1342 additions and 1 deletions
+14
View File
@@ -18,6 +18,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/mem.h"
#include "libavutil/pixdesc.h"
#include "libswscale/ops.h"
#include "libswscale/ops_dispatch.h"
@@ -109,6 +110,7 @@ int main(int argc, char **argv)
enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE;
SwsContext *ctx = NULL;
SwsGraph *graph = NULL;
bool macros_gen = false;
int ret = 1;
#ifdef _WIN32
@@ -127,6 +129,8 @@ int main(int argc, char **argv)
" Only test the specified source pixel format\n"
" -v <level>\n"
" Enable log verbosity at given level\n"
" -macros\n"
" Generate helper macros\n"
);
return 0;
}
@@ -153,6 +157,8 @@ int main(int argc, char **argv)
goto bad_option;
av_log_set_level(atoi(argv[i + 1]));
i++;
} else if (!strcmp(argv[i], "-macros")) {
macros_gen = true;
} else {
bad_option:
fprintf(stderr, "bad option or argument missing (%s) see -help\n", argv[i]);
@@ -160,6 +166,14 @@ bad_option:
}
}
if (macros_gen) {
char *macros = NULL;
ret = ff_sws_uops_macros_gen(&macros);
if (ret >= 0)
puts(macros);
av_free(macros);
return ret;
}
/* Allocate dummy graph and context for ff_sws_compile_pass() */
graph = ff_sws_graph_alloc();
if (!graph)
+298 -1
View File
@@ -23,6 +23,7 @@
#include "libavutil/avassert.h"
#include "libavutil/mem.h"
#include "libavutil/refstruct.h"
#include "libavutil/tree.h"
#include "ops.h"
#include "ops_internal.h"
@@ -42,8 +43,9 @@ int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b)
static const struct {
char full[24];
char abbr[16];
char macro[16];
} uop_names[SWS_UOP_TYPE_NB] = {
#define UOP_NAME(OP, ABBR) [SWS_UOP_##OP] = { "SWS_UOP_" #OP, ABBR }
#define UOP_NAME(OP, ABBR) [SWS_UOP_##OP] = { "SWS_UOP_" #OP, ABBR, #OP }
UOP_NAME(INVALID, "invalid"),
UOP_NAME(READ_PLANAR, "read_planar"),
UOP_NAME(READ_PLANAR_FH, "read_planar_fh"),
@@ -79,6 +81,17 @@ static const struct {
#undef UOP_NAME
};
static const struct {
char full[16];
char prefix[8];
} pixel_types[SWS_PIXEL_TYPE_NB] = {
[SWS_PIXEL_NONE] = { "SWS_PIXEL_NONE", "" },
[SWS_PIXEL_U8] = { "SWS_PIXEL_U8", "U8_" },
[SWS_PIXEL_U16] = { "SWS_PIXEL_U16", "U16_" },
[SWS_PIXEL_U32] = { "SWS_PIXEL_U32", "U32_" },
[SWS_PIXEL_F32] = { "SWS_PIXEL_F32", "F32_" },
};
static SwsPixel pixel_from_q(SwsPixelType type, AVRational val)
{
av_assert1(val.den != 0);
@@ -188,6 +201,101 @@ void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
av_assert0(av_bprint_is_complete(&bp));
}
static int generate_entry_struct(void *opaque, void *key)
{
const SwsUOp *ref = opaque;
const SwsUOp *uop = key;
AVBPrint *bp = ref->data.opaque;
char name[SWS_UOP_NAME_MAX];
ff_sws_uop_name(uop, name);
av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s", name);
av_bprintf(bp, ", .type = %-13s, .uop = %-24s, .mask = 0x%x",
pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask);
const SwsUOpParams *par = &uop->par;
switch (uop->uop) {
case SWS_UOP_LSHIFT:
case SWS_UOP_RSHIFT:
av_bprintf(bp, ", .par.shift.amount = %u", par->shift.amount);
break;
case SWS_UOP_PERMUTE:
case SWS_UOP_COPY:
av_bprintf(bp, ", .par.swizzle.in = {%d, %d, %d, %d}",
par->swizzle.in[0], par->swizzle.in[1],
par->swizzle.in[2], par->swizzle.in[3]);
break;
case SWS_UOP_PACK:
case SWS_UOP_UNPACK:
av_bprintf(bp, ", .par.pack.pattern = {%d, %d, %d, %d}",
par->pack.pattern[0], par->pack.pattern[1],
par->pack.pattern[2], par->pack.pattern[3]);
break;
case SWS_UOP_CLEAR:
av_bprintf(bp, ", .par.clear.one = 0x%x, .par.clear.zero = 0x%x",
par->clear.one, par->clear.zero);
break;
case SWS_UOP_LINEAR:
av_bprintf(bp, ", .par.lin.one = 0x%x, .par.lin.zero = 0x%x",
par->lin.one, par->lin.zero);
break;
case SWS_UOP_DITHER:
av_bprintf(bp, ", .par.dither = { .y_offset = {%u, %u, %u, %u}, .size_log2 = %u }",
par->dither.y_offset[0], par->dither.y_offset[1],
par->dither.y_offset[2], par->dither.y_offset[3],
par->dither.size_log2);
break;
}
av_bprintf(bp, ")");
return 0;
}
static int generate_entry_args(void *opaque, void *key)
{
const SwsUOp *ref = opaque;
const SwsUOp *uop = key;
AVBPrint *bp = ref->data.opaque;
char name[SWS_UOP_NAME_MAX];
ff_sws_uop_name(uop, name);
av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s, %-13s, %-24s, 0x%x",
name, pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask);
const SwsUOpParams *par = &uop->par;
switch (uop->uop) {
case SWS_UOP_LSHIFT:
case SWS_UOP_RSHIFT:
av_bprintf(bp, ", %u", par->shift.amount);
break;
case SWS_UOP_PERMUTE:
case SWS_UOP_COPY:
av_bprintf(bp, ", %d, %d, %d, %d",
par->swizzle.in[0], par->swizzle.in[1],
par->swizzle.in[2], par->swizzle.in[3]);
break;
case SWS_UOP_PACK:
case SWS_UOP_UNPACK:
av_bprintf(bp, ", %d, %d, %d, %d",
par->pack.pattern[0], par->pack.pattern[1],
par->pack.pattern[2], par->pack.pattern[3]);
break;
case SWS_UOP_CLEAR:
av_bprintf(bp, ", 0x%05x, 0x%05x", par->clear.one, par->clear.zero);
break;
case SWS_UOP_LINEAR:
av_bprintf(bp, ", 0x%05x, 0x%05x", par->lin.one, par->lin.zero);
break;
case SWS_UOP_DITHER:
av_bprintf(bp, ", %u, %u, %u, %u, %u",
par->dither.y_offset[0], par->dither.y_offset[1],
par->dither.y_offset[2], par->dither.y_offset[3],
par->dither.size_log2);
break;
}
av_bprintf(bp, ")");
return 0;
}
static void uop_uninit(SwsUOp *uop)
{
switch (uop->uop) {
@@ -541,3 +649,192 @@ int ff_sws_ops_translate(const SwsOpList *ops, SwsUOpList *uops)
}
return 0;
}
static int register_uop(struct AVTreeNode **root, const SwsUOp *uop)
{
SwsUOp *key = av_memdup(uop, sizeof(*uop));
if (!key)
return AVERROR(ENOMEM);
memset(&key->data, 0, sizeof(key->data));
struct AVTreeNode *node = av_tree_node_alloc();
if (!node) {
av_free(key);
return AVERROR(ENOMEM);
}
av_tree_insert(root, key, ff_sws_uop_cmp_v, &node);
if (node) {
av_free(node);
av_free(key);
}
return 0;
}
static int register_uops(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
{
SwsUOpList *uops = ff_sws_uop_list_alloc();
if (!uops)
return AVERROR(ENOMEM);
int ret = ff_sws_ops_translate(ops, uops);
if (ret < 0)
goto fail;
struct AVTreeNode **root = ctx->opaque;
for (int i = 0; i < uops->num_ops; i++) {
ret = register_uop(root, &uops->ops[i]);
if (ret < 0)
goto fail;
}
fail:
*out = (SwsCompiledOp) {0}; /* dummy value, will be immediately freed */
ff_sws_uop_list_free(&uops);
return ret;
}
/* Dummy backend that just registers all seen uops */
static const SwsOpBackend backend_uops = {
.name = "uops_gen",
.compile = register_uops,
};
static int register_all_uops(SwsContext *ctx, void *graph, SwsOpList *ops)
{
/* ff_sws_compile_pass() takes over ownership of `ops` */
SwsOpList *copy = ff_sws_op_list_duplicate(ops);
if (!copy)
return AVERROR(ENOMEM);
return ff_sws_compile_pass(graph, &backend_uops, &copy, 0, NULL, NULL);
}
static const SwsFlags flags[] = {
0,
/* SWS_ACCURATE_RND may insert extra 1x1 dither ops (for accurate rounding) */
SWS_ACCURATE_RND,
};
/* Limit the range of av_tree_enumerate() to only matching uop and type */
static int enum_type(void *opaque, void *elem)
{
const SwsUOp *a = opaque, *b = elem;
if (a->type != b->type)
return (int) b->type - a->type;
if (a->uop != b->uop)
return (int) b->uop - a->uop;
return 0;
}
static int free_uop_key(void *opaque, void *key)
{
av_free(key);
return 0;
}
int ff_sws_uops_macros_gen(char **out_str)
{
int ret;
struct AVTreeNode *root = NULL;
AVBPrint bprint, *const bp = &bprint;
av_bprint_init(bp, 0, AV_BPRINT_SIZE_UNLIMITED);
/* Allocate dummy graph and context for ff_sws_compile_pass() */
SwsGraph *graph = ff_sws_graph_alloc();
if (!graph)
return AVERROR(ENOMEM);
SwsContext *ctx = graph->ctx = sws_alloc_context();
if (!ctx) {
ret = AVERROR(ENOMEM);
goto fail;
}
/* Use this to plumb the tree state through all the layers of abstraction */
ctx->opaque = &root;
ctx->scaler = SWS_SCALE_BILINEAR; /* cheaper to generate filter kernels */
/* Register all unique uops over every relevant combination of flags */
for (int i = 0; i < FF_ARRAY_ELEMS(flags); i++) {
ctx->flags = flags[i];
ret = ff_sws_enum_op_lists(ctx, graph, AV_PIX_FMT_NONE, AV_PIX_FMT_NONE,
register_all_uops);
if (ret < 0)
goto fail;
}
/**
* Additionally make sure planar reads/writes are always available for all
* formats, because checkasm depends on them to be able to verify the
* input/output of any other operations.
*/
for (enum SwsPixelType type = SWS_PIXEL_NONE+1; type < SWS_PIXEL_TYPE_NB; type++) {
if (!ff_sws_pixel_type_is_int(type))
continue;
for (int elems = 1; elems <= 4; elems++) {
for (int rw = 0; rw < 2; rw++) {
SwsUOp uop = {
.type = type,
.uop = rw ? SWS_UOP_WRITE_PLANAR : SWS_UOP_READ_PLANAR,
.mask = SWS_COMP_ELEMS(elems),
};
ret = register_uop(&root, &uop);
if (ret < 0)
goto fail;
}
}
}
#define BPRINT_STR(str) av_bprint_append_data(bp, str, strlen(str))
BPRINT_STR(
"/**\n"
" * This file is automatically generated. Do not edit manually.\n"
" * To regenerate, run: make fate-sws-uops-macros GEN=1\n"
" */\n"
"\n"
"#ifndef SWSCALE_UOPS_MACROS_H\n"
"#define SWSCALE_UOPS_MACROS_H\n"
"\n"
"/**\n"
" * Boilerplate helper macros, for template-based backends. These will be\n"
" * instantiated like this, with parameters in struct order:\n"
" * MACRO(__VA_ARGS__, NAME, UOP, TYPE, MASK, [PARAMS,])\n"
" * The _STRUCT variants pass all arguments in C struct syntax, while the\n"
" * plain variants give them as separate C values (e.g. for use in calls)\n"
" */\n"
"#define SWS_GLUE3(x, y, z) x ## _ ## y ## _ ## z\n"
"#define SWS_FOR(TYPE, UOP, MACRO, ...) \\\n"
" SWS_GLUE3(SWS_FOR, TYPE, UOP)(MACRO, __VA_ARGS__)\n"
"#define SWS_FOR_STRUCT(TYPE, UOP, MACRO, ...) \\\n"
" SWS_GLUE3(SWS_FOR_STRUCT, TYPE, UOP)(MACRO, __VA_ARGS__)\n"
"\n");
SwsUOp key = { .data.opaque = bp };
for (key.type = SWS_PIXEL_NONE + 1; key.type < SWS_PIXEL_TYPE_NB; key.type++) {
for (key.uop = SWS_UOP_INVALID + 1; key.uop < SWS_UOP_TYPE_NB; key.uop++) {
const char *macro = uop_names[key.uop].macro;
const char *prefix = pixel_types[key.type].prefix;
av_bprintf(bp, "#define SWS_FOR_%s%s(MACRO, ...)", prefix, macro);
av_tree_enumerate(root, &key, enum_type, generate_entry_args);
av_bprintf(bp, "\n");
av_bprintf(bp, "#define SWS_FOR_STRUCT_%s%s(MACRO, ...)", prefix, macro);
av_tree_enumerate(root, &key, enum_type, generate_entry_struct);
av_bprintf(bp, "\n");
}
}
BPRINT_STR("\n#endif /* SWSCALE_UOPS_MACROS_H */");
ret = av_bprint_finalize(bp, out_str);
fail:
av_bprint_finalize(bp, NULL);
av_tree_enumerate(root, NULL, NULL, free_uop_key);
av_tree_destroy(root);
ff_sws_graph_free(&graph);
sws_free_context(&ctx);
return ret;
}
+13
View File
@@ -182,6 +182,7 @@ typedef struct SwsUOp {
SwsPixel scalar;
SwsPixel vec4[4];
SwsPixel mat4[4][5]; /* row major */
void *opaque; /* reserved for internal use */
} data;
} SwsUOp;
@@ -220,4 +221,16 @@ int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop);
*/
int ff_sws_ops_translate(const SwsOpList *ops, SwsUOpList *uops);
/**
* Generate a set of boilerplate C preprocessor macros for describing and
* programmatically iterating over all possible SwsUOps.
*
* This function can be quite slow as it iterates over every possible
* combination of pixel formats and flags.
*
* Returns 0 or a negative error code. On success, an allocated string is
* returned via `out_str`, and must be av_free()'d by the caller.
*/
int ff_sws_uops_macros_gen(char **out_str);
#endif
File diff suppressed because it is too large Load Diff
+7
View File
@@ -47,6 +47,13 @@ ifneq ($(HAVE_BIGENDIAN),yes)
FATE_LIBSWSCALE-$(CONFIG_UNSTABLE) += fate-sws-ops-list
fate-sws-ops-list: libswscale/tests/sws_ops$(EXESUF)
fate-sws-ops-list: CMD = run libswscale/tests/sws_ops$(EXESUF) | do_md5sum | cut -d" " -f1
# Disable on bigendian because it would result in a different iteration order
# (and thus output) due to sorting by memcmp() on the parameters struct.
FATE_LIBSWSCALE-$(CONFIG_UNSTABLE) += fate-sws-uops-macros
fate-sws-uops-macros: libswscale/tests/sws_ops$(EXESUF)
fate-sws-uops-macros: REF = $(SRC_PATH)/libswscale/uops_macros.h
fate-sws-uops-macros: CMD = run libswscale/tests/sws_ops$(EXESUF) -macros
endif
FATE_LIBSWSCALE += $(FATE_LIBSWSCALE-yes)
+1
View File
@@ -19,6 +19,7 @@ libswresample/log2_tab.c
libswscale/aarch64/ops_entries.c
libswscale/log2_tab.c
libswscale/riscv/cpu_common.c
libswscale/uops_macros.h
tools/uncoded_frame.c
tools/yuvcmp.c
Headers without standard inclusion guards: