mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-11 08:13:06 +00:00
swscale/uops: generate uop helper macros
This follows the same approach as is used currently by ops_entries_aarch64, except I decided to have the generation logic live directly in uops.c to allow re-using internal helpers and move it closer to the other helpers that depend on the exact set of uops and their fields. Unlike libswscale/tests/sws_ops.c, we make an effort to actually test all relevant flag combinations, since these can affect the generated op lists. I will use these macros to auto-generate both the C template-based kernels, as well as the entire x86 backend, in the near future, hence their excessive flexibility. Re-use the libswscale/tests/sws_ops.c that we already compile. We could put it in its own file but this is just as convenient, and it's easily moved anyways. Having it be a FATE test ensures that it is always up-to-date. Signed-off-by: Niklas Haas <git@haasn.dev>
This commit is contained in:
@@ -18,6 +18,7 @@
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/mem.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "libswscale/ops.h"
|
||||
#include "libswscale/ops_dispatch.h"
|
||||
@@ -109,6 +110,7 @@ int main(int argc, char **argv)
|
||||
enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE;
|
||||
SwsContext *ctx = NULL;
|
||||
SwsGraph *graph = NULL;
|
||||
bool macros_gen = false;
|
||||
int ret = 1;
|
||||
|
||||
#ifdef _WIN32
|
||||
@@ -127,6 +129,8 @@ int main(int argc, char **argv)
|
||||
" Only test the specified source pixel format\n"
|
||||
" -v <level>\n"
|
||||
" Enable log verbosity at given level\n"
|
||||
" -macros\n"
|
||||
" Generate helper macros\n"
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
@@ -153,6 +157,8 @@ int main(int argc, char **argv)
|
||||
goto bad_option;
|
||||
av_log_set_level(atoi(argv[i + 1]));
|
||||
i++;
|
||||
} else if (!strcmp(argv[i], "-macros")) {
|
||||
macros_gen = true;
|
||||
} else {
|
||||
bad_option:
|
||||
fprintf(stderr, "bad option or argument missing (%s) see -help\n", argv[i]);
|
||||
@@ -160,6 +166,14 @@ bad_option:
|
||||
}
|
||||
}
|
||||
|
||||
if (macros_gen) {
|
||||
char *macros = NULL;
|
||||
ret = ff_sws_uops_macros_gen(¯os);
|
||||
if (ret >= 0)
|
||||
puts(macros);
|
||||
av_free(macros);
|
||||
return ret;
|
||||
}
|
||||
/* Allocate dummy graph and context for ff_sws_compile_pass() */
|
||||
graph = ff_sws_graph_alloc();
|
||||
if (!graph)
|
||||
|
||||
+298
-1
@@ -23,6 +23,7 @@
|
||||
#include "libavutil/avassert.h"
|
||||
#include "libavutil/mem.h"
|
||||
#include "libavutil/refstruct.h"
|
||||
#include "libavutil/tree.h"
|
||||
|
||||
#include "ops.h"
|
||||
#include "ops_internal.h"
|
||||
@@ -42,8 +43,9 @@ int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b)
|
||||
static const struct {
|
||||
char full[24];
|
||||
char abbr[16];
|
||||
char macro[16];
|
||||
} uop_names[SWS_UOP_TYPE_NB] = {
|
||||
#define UOP_NAME(OP, ABBR) [SWS_UOP_##OP] = { "SWS_UOP_" #OP, ABBR }
|
||||
#define UOP_NAME(OP, ABBR) [SWS_UOP_##OP] = { "SWS_UOP_" #OP, ABBR, #OP }
|
||||
UOP_NAME(INVALID, "invalid"),
|
||||
UOP_NAME(READ_PLANAR, "read_planar"),
|
||||
UOP_NAME(READ_PLANAR_FH, "read_planar_fh"),
|
||||
@@ -79,6 +81,17 @@ static const struct {
|
||||
#undef UOP_NAME
|
||||
};
|
||||
|
||||
static const struct {
|
||||
char full[16];
|
||||
char prefix[8];
|
||||
} pixel_types[SWS_PIXEL_TYPE_NB] = {
|
||||
[SWS_PIXEL_NONE] = { "SWS_PIXEL_NONE", "" },
|
||||
[SWS_PIXEL_U8] = { "SWS_PIXEL_U8", "U8_" },
|
||||
[SWS_PIXEL_U16] = { "SWS_PIXEL_U16", "U16_" },
|
||||
[SWS_PIXEL_U32] = { "SWS_PIXEL_U32", "U32_" },
|
||||
[SWS_PIXEL_F32] = { "SWS_PIXEL_F32", "F32_" },
|
||||
};
|
||||
|
||||
static SwsPixel pixel_from_q(SwsPixelType type, AVRational val)
|
||||
{
|
||||
av_assert1(val.den != 0);
|
||||
@@ -188,6 +201,101 @@ void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
|
||||
av_assert0(av_bprint_is_complete(&bp));
|
||||
}
|
||||
|
||||
static int generate_entry_struct(void *opaque, void *key)
|
||||
{
|
||||
const SwsUOp *ref = opaque;
|
||||
const SwsUOp *uop = key;
|
||||
AVBPrint *bp = ref->data.opaque;
|
||||
char name[SWS_UOP_NAME_MAX];
|
||||
ff_sws_uop_name(uop, name);
|
||||
av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s", name);
|
||||
av_bprintf(bp, ", .type = %-13s, .uop = %-24s, .mask = 0x%x",
|
||||
pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask);
|
||||
|
||||
const SwsUOpParams *par = &uop->par;
|
||||
switch (uop->uop) {
|
||||
case SWS_UOP_LSHIFT:
|
||||
case SWS_UOP_RSHIFT:
|
||||
av_bprintf(bp, ", .par.shift.amount = %u", par->shift.amount);
|
||||
break;
|
||||
case SWS_UOP_PERMUTE:
|
||||
case SWS_UOP_COPY:
|
||||
av_bprintf(bp, ", .par.swizzle.in = {%d, %d, %d, %d}",
|
||||
par->swizzle.in[0], par->swizzle.in[1],
|
||||
par->swizzle.in[2], par->swizzle.in[3]);
|
||||
break;
|
||||
case SWS_UOP_PACK:
|
||||
case SWS_UOP_UNPACK:
|
||||
av_bprintf(bp, ", .par.pack.pattern = {%d, %d, %d, %d}",
|
||||
par->pack.pattern[0], par->pack.pattern[1],
|
||||
par->pack.pattern[2], par->pack.pattern[3]);
|
||||
break;
|
||||
case SWS_UOP_CLEAR:
|
||||
av_bprintf(bp, ", .par.clear.one = 0x%x, .par.clear.zero = 0x%x",
|
||||
par->clear.one, par->clear.zero);
|
||||
break;
|
||||
case SWS_UOP_LINEAR:
|
||||
av_bprintf(bp, ", .par.lin.one = 0x%x, .par.lin.zero = 0x%x",
|
||||
par->lin.one, par->lin.zero);
|
||||
break;
|
||||
case SWS_UOP_DITHER:
|
||||
av_bprintf(bp, ", .par.dither = { .y_offset = {%u, %u, %u, %u}, .size_log2 = %u }",
|
||||
par->dither.y_offset[0], par->dither.y_offset[1],
|
||||
par->dither.y_offset[2], par->dither.y_offset[3],
|
||||
par->dither.size_log2);
|
||||
break;
|
||||
}
|
||||
|
||||
av_bprintf(bp, ")");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int generate_entry_args(void *opaque, void *key)
|
||||
{
|
||||
const SwsUOp *ref = opaque;
|
||||
const SwsUOp *uop = key;
|
||||
AVBPrint *bp = ref->data.opaque;
|
||||
char name[SWS_UOP_NAME_MAX];
|
||||
ff_sws_uop_name(uop, name);
|
||||
av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s, %-13s, %-24s, 0x%x",
|
||||
name, pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask);
|
||||
|
||||
const SwsUOpParams *par = &uop->par;
|
||||
switch (uop->uop) {
|
||||
case SWS_UOP_LSHIFT:
|
||||
case SWS_UOP_RSHIFT:
|
||||
av_bprintf(bp, ", %u", par->shift.amount);
|
||||
break;
|
||||
case SWS_UOP_PERMUTE:
|
||||
case SWS_UOP_COPY:
|
||||
av_bprintf(bp, ", %d, %d, %d, %d",
|
||||
par->swizzle.in[0], par->swizzle.in[1],
|
||||
par->swizzle.in[2], par->swizzle.in[3]);
|
||||
break;
|
||||
case SWS_UOP_PACK:
|
||||
case SWS_UOP_UNPACK:
|
||||
av_bprintf(bp, ", %d, %d, %d, %d",
|
||||
par->pack.pattern[0], par->pack.pattern[1],
|
||||
par->pack.pattern[2], par->pack.pattern[3]);
|
||||
break;
|
||||
case SWS_UOP_CLEAR:
|
||||
av_bprintf(bp, ", 0x%05x, 0x%05x", par->clear.one, par->clear.zero);
|
||||
break;
|
||||
case SWS_UOP_LINEAR:
|
||||
av_bprintf(bp, ", 0x%05x, 0x%05x", par->lin.one, par->lin.zero);
|
||||
break;
|
||||
case SWS_UOP_DITHER:
|
||||
av_bprintf(bp, ", %u, %u, %u, %u, %u",
|
||||
par->dither.y_offset[0], par->dither.y_offset[1],
|
||||
par->dither.y_offset[2], par->dither.y_offset[3],
|
||||
par->dither.size_log2);
|
||||
break;
|
||||
}
|
||||
|
||||
av_bprintf(bp, ")");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void uop_uninit(SwsUOp *uop)
|
||||
{
|
||||
switch (uop->uop) {
|
||||
@@ -541,3 +649,192 @@ int ff_sws_ops_translate(const SwsOpList *ops, SwsUOpList *uops)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int register_uop(struct AVTreeNode **root, const SwsUOp *uop)
|
||||
{
|
||||
SwsUOp *key = av_memdup(uop, sizeof(*uop));
|
||||
if (!key)
|
||||
return AVERROR(ENOMEM);
|
||||
memset(&key->data, 0, sizeof(key->data));
|
||||
|
||||
struct AVTreeNode *node = av_tree_node_alloc();
|
||||
if (!node) {
|
||||
av_free(key);
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
av_tree_insert(root, key, ff_sws_uop_cmp_v, &node);
|
||||
if (node) {
|
||||
av_free(node);
|
||||
av_free(key);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int register_uops(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
|
||||
{
|
||||
SwsUOpList *uops = ff_sws_uop_list_alloc();
|
||||
if (!uops)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
int ret = ff_sws_ops_translate(ops, uops);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
|
||||
struct AVTreeNode **root = ctx->opaque;
|
||||
for (int i = 0; i < uops->num_ops; i++) {
|
||||
ret = register_uop(root, &uops->ops[i]);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
fail:
|
||||
*out = (SwsCompiledOp) {0}; /* dummy value, will be immediately freed */
|
||||
ff_sws_uop_list_free(&uops);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Dummy backend that just registers all seen uops */
|
||||
static const SwsOpBackend backend_uops = {
|
||||
.name = "uops_gen",
|
||||
.compile = register_uops,
|
||||
};
|
||||
|
||||
static int register_all_uops(SwsContext *ctx, void *graph, SwsOpList *ops)
|
||||
{
|
||||
/* ff_sws_compile_pass() takes over ownership of `ops` */
|
||||
SwsOpList *copy = ff_sws_op_list_duplicate(ops);
|
||||
if (!copy)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
return ff_sws_compile_pass(graph, &backend_uops, ©, 0, NULL, NULL);
|
||||
}
|
||||
|
||||
static const SwsFlags flags[] = {
|
||||
0,
|
||||
|
||||
/* SWS_ACCURATE_RND may insert extra 1x1 dither ops (for accurate rounding) */
|
||||
SWS_ACCURATE_RND,
|
||||
};
|
||||
|
||||
/* Limit the range of av_tree_enumerate() to only matching uop and type */
|
||||
static int enum_type(void *opaque, void *elem)
|
||||
{
|
||||
const SwsUOp *a = opaque, *b = elem;
|
||||
if (a->type != b->type)
|
||||
return (int) b->type - a->type;
|
||||
if (a->uop != b->uop)
|
||||
return (int) b->uop - a->uop;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int free_uop_key(void *opaque, void *key)
|
||||
{
|
||||
av_free(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ff_sws_uops_macros_gen(char **out_str)
|
||||
{
|
||||
int ret;
|
||||
struct AVTreeNode *root = NULL;
|
||||
|
||||
AVBPrint bprint, *const bp = &bprint;
|
||||
av_bprint_init(bp, 0, AV_BPRINT_SIZE_UNLIMITED);
|
||||
|
||||
/* Allocate dummy graph and context for ff_sws_compile_pass() */
|
||||
SwsGraph *graph = ff_sws_graph_alloc();
|
||||
if (!graph)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
SwsContext *ctx = graph->ctx = sws_alloc_context();
|
||||
if (!ctx) {
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Use this to plumb the tree state through all the layers of abstraction */
|
||||
ctx->opaque = &root;
|
||||
ctx->scaler = SWS_SCALE_BILINEAR; /* cheaper to generate filter kernels */
|
||||
|
||||
/* Register all unique uops over every relevant combination of flags */
|
||||
for (int i = 0; i < FF_ARRAY_ELEMS(flags); i++) {
|
||||
ctx->flags = flags[i];
|
||||
ret = ff_sws_enum_op_lists(ctx, graph, AV_PIX_FMT_NONE, AV_PIX_FMT_NONE,
|
||||
register_all_uops);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/**
|
||||
* Additionally make sure planar reads/writes are always available for all
|
||||
* formats, because checkasm depends on them to be able to verify the
|
||||
* input/output of any other operations.
|
||||
*/
|
||||
for (enum SwsPixelType type = SWS_PIXEL_NONE+1; type < SWS_PIXEL_TYPE_NB; type++) {
|
||||
if (!ff_sws_pixel_type_is_int(type))
|
||||
continue;
|
||||
for (int elems = 1; elems <= 4; elems++) {
|
||||
for (int rw = 0; rw < 2; rw++) {
|
||||
SwsUOp uop = {
|
||||
.type = type,
|
||||
.uop = rw ? SWS_UOP_WRITE_PLANAR : SWS_UOP_READ_PLANAR,
|
||||
.mask = SWS_COMP_ELEMS(elems),
|
||||
};
|
||||
|
||||
ret = register_uop(&root, &uop);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define BPRINT_STR(str) av_bprint_append_data(bp, str, strlen(str))
|
||||
BPRINT_STR(
|
||||
"/**\n"
|
||||
" * This file is automatically generated. Do not edit manually.\n"
|
||||
" * To regenerate, run: make fate-sws-uops-macros GEN=1\n"
|
||||
" */\n"
|
||||
"\n"
|
||||
"#ifndef SWSCALE_UOPS_MACROS_H\n"
|
||||
"#define SWSCALE_UOPS_MACROS_H\n"
|
||||
"\n"
|
||||
"/**\n"
|
||||
" * Boilerplate helper macros, for template-based backends. These will be\n"
|
||||
" * instantiated like this, with parameters in struct order:\n"
|
||||
" * MACRO(__VA_ARGS__, NAME, UOP, TYPE, MASK, [PARAMS,])\n"
|
||||
" * The _STRUCT variants pass all arguments in C struct syntax, while the\n"
|
||||
" * plain variants give them as separate C values (e.g. for use in calls)\n"
|
||||
" */\n"
|
||||
"#define SWS_GLUE3(x, y, z) x ## _ ## y ## _ ## z\n"
|
||||
"#define SWS_FOR(TYPE, UOP, MACRO, ...) \\\n"
|
||||
" SWS_GLUE3(SWS_FOR, TYPE, UOP)(MACRO, __VA_ARGS__)\n"
|
||||
"#define SWS_FOR_STRUCT(TYPE, UOP, MACRO, ...) \\\n"
|
||||
" SWS_GLUE3(SWS_FOR_STRUCT, TYPE, UOP)(MACRO, __VA_ARGS__)\n"
|
||||
"\n");
|
||||
|
||||
SwsUOp key = { .data.opaque = bp };
|
||||
for (key.type = SWS_PIXEL_NONE + 1; key.type < SWS_PIXEL_TYPE_NB; key.type++) {
|
||||
for (key.uop = SWS_UOP_INVALID + 1; key.uop < SWS_UOP_TYPE_NB; key.uop++) {
|
||||
const char *macro = uop_names[key.uop].macro;
|
||||
const char *prefix = pixel_types[key.type].prefix;
|
||||
av_bprintf(bp, "#define SWS_FOR_%s%s(MACRO, ...)", prefix, macro);
|
||||
av_tree_enumerate(root, &key, enum_type, generate_entry_args);
|
||||
av_bprintf(bp, "\n");
|
||||
av_bprintf(bp, "#define SWS_FOR_STRUCT_%s%s(MACRO, ...)", prefix, macro);
|
||||
av_tree_enumerate(root, &key, enum_type, generate_entry_struct);
|
||||
av_bprintf(bp, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
BPRINT_STR("\n#endif /* SWSCALE_UOPS_MACROS_H */");
|
||||
ret = av_bprint_finalize(bp, out_str);
|
||||
|
||||
fail:
|
||||
av_bprint_finalize(bp, NULL);
|
||||
av_tree_enumerate(root, NULL, NULL, free_uop_key);
|
||||
av_tree_destroy(root);
|
||||
ff_sws_graph_free(&graph);
|
||||
sws_free_context(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -182,6 +182,7 @@ typedef struct SwsUOp {
|
||||
SwsPixel scalar;
|
||||
SwsPixel vec4[4];
|
||||
SwsPixel mat4[4][5]; /* row major */
|
||||
void *opaque; /* reserved for internal use */
|
||||
} data;
|
||||
} SwsUOp;
|
||||
|
||||
@@ -220,4 +221,16 @@ int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop);
|
||||
*/
|
||||
int ff_sws_ops_translate(const SwsOpList *ops, SwsUOpList *uops);
|
||||
|
||||
/**
|
||||
* Generate a set of boilerplate C preprocessor macros for describing and
|
||||
* programmatically iterating over all possible SwsUOps.
|
||||
*
|
||||
* This function can be quite slow as it iterates over every possible
|
||||
* combination of pixel formats and flags.
|
||||
*
|
||||
* Returns 0 or a negative error code. On success, an allocated string is
|
||||
* returned via `out_str`, and must be av_free()'d by the caller.
|
||||
*/
|
||||
int ff_sws_uops_macros_gen(char **out_str);
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -47,6 +47,13 @@ ifneq ($(HAVE_BIGENDIAN),yes)
|
||||
FATE_LIBSWSCALE-$(CONFIG_UNSTABLE) += fate-sws-ops-list
|
||||
fate-sws-ops-list: libswscale/tests/sws_ops$(EXESUF)
|
||||
fate-sws-ops-list: CMD = run libswscale/tests/sws_ops$(EXESUF) | do_md5sum | cut -d" " -f1
|
||||
|
||||
# Disable on bigendian because it would result in a different iteration order
|
||||
# (and thus output) due to sorting by memcmp() on the parameters struct.
|
||||
FATE_LIBSWSCALE-$(CONFIG_UNSTABLE) += fate-sws-uops-macros
|
||||
fate-sws-uops-macros: libswscale/tests/sws_ops$(EXESUF)
|
||||
fate-sws-uops-macros: REF = $(SRC_PATH)/libswscale/uops_macros.h
|
||||
fate-sws-uops-macros: CMD = run libswscale/tests/sws_ops$(EXESUF) -macros
|
||||
endif
|
||||
|
||||
FATE_LIBSWSCALE += $(FATE_LIBSWSCALE-yes)
|
||||
|
||||
@@ -19,6 +19,7 @@ libswresample/log2_tab.c
|
||||
libswscale/aarch64/ops_entries.c
|
||||
libswscale/log2_tab.c
|
||||
libswscale/riscv/cpu_common.c
|
||||
libswscale/uops_macros.h
|
||||
tools/uncoded_frame.c
|
||||
tools/yuvcmp.c
|
||||
Headers without standard inclusion guards:
|
||||
|
||||
Reference in New Issue
Block a user