swscale/x86/uops: generate NASM macros using uops_macros.h

Rather than hard-coding a separate set of NASM macros, or generating them
with a separate function, we can just leverage the C preprocessor to generate
a NASM source file *from* the existing ops macros.

This is maybe a bit unorthodox, but it avoids unnecessary overhead from
re-generating the macros twice, avoids manual updating of the NASM macros,
and generally does not come with any real downside except being a bit ugly.

The main source of ugliness is the fact that the C preprocessor expands
everything into a single line, whereas NASM expects separate statements to
be on separate lines. Very fortunately, we can work around this by writing a
another NASM macro to take its arguments and dump them onto multiple lines.

It may seem premature, but I went ahead and defined all the macros, since
it was easy enough to do.

I added the %include in this commit to trigger build errors that occur only
as a result of introducing this file in the same commit that introduces it.

Signed-off-by: Niklas Haas <git@haasn.dev>
This commit is contained in:
Niklas Haas
2026-06-09 18:27:20 +02:00
parent 6057759ffc
commit 6deae052a2
6 changed files with 102 additions and 1 deletions
+1
View File
@@ -26,6 +26,7 @@
*.spv
*.spv.c
*.spv.gz
*.gen.asm
*.gen.c
*.gen.S
*.ptx
+1 -1
View File
@@ -254,7 +254,7 @@ $(TOOLOBJS): | tools
OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS))
CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.gz *.spv.c *.gen.c *.gen.S *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *~ *.ilk *.pdb
CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.gz *.spv.c *.gen.asm *.gen.c *.gen.S *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *~ *.ilk *.pdb
LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
define RULES
+9
View File
@@ -15,9 +15,18 @@ X86ASM-OBJS += x86/input.o \
x86/yuv_2_rgb.o \
x86/yuv2yuvX.o \
SKIPHEADERS += x86/uops_macros.asm.h
ifdef ARCH_X86_64
X86ASM-OBJS-$(CONFIG_UNSTABLE) += x86/ops_common.o \
x86/ops_int.o \
x86/ops_float.o \
x86/ops.o
$(SUBDIR)x86/ops_common.o: $(SUBDIR)x86/uops_macros.gen.asm
$(SUBDIR)x86/ops_int.o: $(SUBDIR)x86/uops_macros.gen.asm
$(SUBDIR)x86/ops_float.o: $(SUBDIR)x86/uops_macros.gen.asm
$(SUBDIR)x86/uops_macros.gen.asm: $(SRC_PATH)/libswscale/x86/uops_macros.asm.h \
$(SRC_PATH)/libswscale/uops_macros.h
$(HOSTCC) $(CC_E) $(CPPFLAGS) $<
endif
+1
View File
@@ -19,6 +19,7 @@
;******************************************************************************
%include "libavutil/x86/x86util.asm"
%include "libswscale/x86/uops_macros.gen.asm"
; High-level explanation of how the x86 backend works:
;
+89
View File
@@ -0,0 +1,89 @@
/**
* Copyright (C) 2025 Niklas Haas
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* WARNING: This file is preprocessed by the C compiler to generate the NASM
* source file uops_macros.gen.asm. It must *NOT* include any headers that
* may inadvertently declare any C syntax. The following header only declares
* further macros, and is therefore safe.
*/
#include "../uops_macros.h"
/**
* NASM expects one statement per source line, but the C preprocessor expands
* multi-line macros into a single line. This NASM macro allows us to work
* around that by expanding its arguments into multiple lines.
*/
%macro MULTILINE 0-*
%rep %0
%1
%rotate 1
%endrep
%endmacro
/* Used to suppress "trailing empty parameter" warnings */
%macro dummy 0
%endmacro
#define DECL_OP_MACRO(...) {DECL_OP MACRO __VA_ARGS__},
#define DEF_MACRO(UOP, TYPE) \
%define DECL_##TYPE##_##UOP(MACRO) \
MULTILINE SWS_FOR_##TYPE##_##UOP(DECL_OP_MACRO) \
dummy
#define DEF_ALL_MACROS(TYPE) \
MULTILINE \
{DEF_MACRO(READ_BIT, TYPE)}, \
{DEF_MACRO(READ_NIBBLE, TYPE)}, \
{DEF_MACRO(READ_PACKED, TYPE)}, \
{DEF_MACRO(READ_PLANAR, TYPE)}, \
{DEF_MACRO(READ_PLANAR_FH, TYPE)}, \
{DEF_MACRO(READ_PLANAR_FV, TYPE)}, \
{DEF_MACRO(READ_PLANAR_FV_FMA, TYPE)}, \
{DEF_MACRO(WRITE_BIT, TYPE)}, \
{DEF_MACRO(WRITE_NIBBLE, TYPE)}, \
{DEF_MACRO(WRITE_PACKED, TYPE)}, \
{DEF_MACRO(WRITE_PLANAR, TYPE)}, \
{DEF_MACRO(PERMUTE, TYPE)}, \
{DEF_MACRO(COPY, TYPE)}, \
{DEF_MACRO(SWAP_BYTES, TYPE)}, \
{DEF_MACRO(EXPAND_BIT, TYPE)}, \
{DEF_MACRO(EXPAND_PAIR, TYPE)}, \
{DEF_MACRO(EXPAND_QUAD, TYPE)}, \
{DEF_MACRO(TO_U8, TYPE)}, \
{DEF_MACRO(TO_U16, TYPE)}, \
{DEF_MACRO(TO_U32, TYPE)}, \
{DEF_MACRO(TO_F32, TYPE)}, \
{DEF_MACRO(SCALE, TYPE)}, \
{DEF_MACRO(LINEAR_FMA, TYPE)}, \
{DEF_MACRO(ADD, TYPE)}, \
{DEF_MACRO(MIN, TYPE)}, \
{DEF_MACRO(MAX, TYPE)}, \
{DEF_MACRO(UNPACK, TYPE)}, \
{DEF_MACRO(PACK, TYPE)}, \
{DEF_MACRO(LSHIFT, TYPE)}, \
{DEF_MACRO(RSHIFT, TYPE)}, \
{DEF_MACRO(CLEAR, TYPE)}, \
{DEF_MACRO(DITHER, TYPE)}
DEF_ALL_MACROS(U8)
DEF_ALL_MACROS(U16)
DEF_ALL_MACROS(U32)
DEF_ALL_MACROS(F32)
+1
View File
@@ -28,6 +28,7 @@ compat/float/float.h
compat/float/limits.h
compat/stdbit/stdbit.h
libavcodec/bitstream_template.h
libswscale/x86/uops_macros.asm.h
tools/decode_simple.h
Use of av_clip() where av_clip_uintp2() could be used:
Use of av_clip() where av_clip_intp2() could be used: