From 6deae052a232194f6794936196fd9783d09f9784 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Tue, 7 Apr 2026 02:58:39 +0200 Subject: [PATCH] swscale/x86/uops: generate NASM macros using uops_macros.h Rather than hard-coding a separate set of NASM macros, or generating them with a separate function, we can just leverage the C preprocessor to generate a NASM source file *from* the existing ops macros. This is maybe a bit unorthodox, but it avoids unnecessary overhead from re-generating the macros twice, avoids manual updating of the NASM macros, and generally does not come with any real downside except being a bit ugly. The main source of ugliness is the fact that the C preprocessor expands everything into a single line, whereas NASM expects separate statements to be on separate lines. Very fortunately, we can work around this by writing a another NASM macro to take its arguments and dump them onto multiple lines. It may seem premature, but I went ahead and defined all the macros, since it was easy enough to do. I added the %include in this commit to trigger build errors that occur only as a result of introducing this file in the same commit that introduces it. Signed-off-by: Niklas Haas --- .gitignore | 1 + ffbuild/common.mak | 2 +- libswscale/x86/Makefile | 9 ++++ libswscale/x86/ops_include.asm | 1 + libswscale/x86/uops_macros.asm.h | 89 ++++++++++++++++++++++++++++++++ tests/ref/fate/source | 1 + 6 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 libswscale/x86/uops_macros.asm.h diff --git a/.gitignore b/.gitignore index 9fc61027d2..6e7d5f2ce7 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ *.spv *.spv.c *.spv.gz +*.gen.asm *.gen.c *.gen.S *.ptx diff --git a/ffbuild/common.mak b/ffbuild/common.mak index 29873ad7d4..059de5abf3 100644 --- a/ffbuild/common.mak +++ b/ffbuild/common.mak @@ -254,7 +254,7 @@ $(TOOLOBJS): | tools OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS)) -CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.gz *.spv.c *.gen.c *.gen.S *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *~ *.ilk *.pdb +CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.gz *.spv.c *.gen.asm *.gen.c *.gen.S *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *~ *.ilk *.pdb LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a define RULES diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index 906d1a43f6..da0cb98a9f 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -15,9 +15,18 @@ X86ASM-OBJS += x86/input.o \ x86/yuv_2_rgb.o \ x86/yuv2yuvX.o \ +SKIPHEADERS += x86/uops_macros.asm.h + ifdef ARCH_X86_64 X86ASM-OBJS-$(CONFIG_UNSTABLE) += x86/ops_common.o \ x86/ops_int.o \ x86/ops_float.o \ x86/ops.o + +$(SUBDIR)x86/ops_common.o: $(SUBDIR)x86/uops_macros.gen.asm +$(SUBDIR)x86/ops_int.o: $(SUBDIR)x86/uops_macros.gen.asm +$(SUBDIR)x86/ops_float.o: $(SUBDIR)x86/uops_macros.gen.asm +$(SUBDIR)x86/uops_macros.gen.asm: $(SRC_PATH)/libswscale/x86/uops_macros.asm.h \ + $(SRC_PATH)/libswscale/uops_macros.h + $(HOSTCC) $(CC_E) $(CPPFLAGS) $< endif diff --git a/libswscale/x86/ops_include.asm b/libswscale/x86/ops_include.asm index 4b4ece51c9..d0b45b5f9e 100644 --- a/libswscale/x86/ops_include.asm +++ b/libswscale/x86/ops_include.asm @@ -19,6 +19,7 @@ ;****************************************************************************** %include "libavutil/x86/x86util.asm" +%include "libswscale/x86/uops_macros.gen.asm" ; High-level explanation of how the x86 backend works: ; diff --git a/libswscale/x86/uops_macros.asm.h b/libswscale/x86/uops_macros.asm.h new file mode 100644 index 0000000000..544db36c26 --- /dev/null +++ b/libswscale/x86/uops_macros.asm.h @@ -0,0 +1,89 @@ +/** + * Copyright (C) 2025 Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * WARNING: This file is preprocessed by the C compiler to generate the NASM + * source file uops_macros.gen.asm. It must *NOT* include any headers that + * may inadvertently declare any C syntax. The following header only declares + * further macros, and is therefore safe. + */ +#include "../uops_macros.h" + +/** + * NASM expects one statement per source line, but the C preprocessor expands + * multi-line macros into a single line. This NASM macro allows us to work + * around that by expanding its arguments into multiple lines. + */ +%macro MULTILINE 0-* + %rep %0 + %1 + %rotate 1 + %endrep +%endmacro + +/* Used to suppress "trailing empty parameter" warnings */ +%macro dummy 0 +%endmacro + +#define DECL_OP_MACRO(...) {DECL_OP MACRO __VA_ARGS__}, +#define DEF_MACRO(UOP, TYPE) \ + %define DECL_##TYPE##_##UOP(MACRO) \ + MULTILINE SWS_FOR_##TYPE##_##UOP(DECL_OP_MACRO) \ + dummy + +#define DEF_ALL_MACROS(TYPE) \ + MULTILINE \ + {DEF_MACRO(READ_BIT, TYPE)}, \ + {DEF_MACRO(READ_NIBBLE, TYPE)}, \ + {DEF_MACRO(READ_PACKED, TYPE)}, \ + {DEF_MACRO(READ_PLANAR, TYPE)}, \ + {DEF_MACRO(READ_PLANAR_FH, TYPE)}, \ + {DEF_MACRO(READ_PLANAR_FV, TYPE)}, \ + {DEF_MACRO(READ_PLANAR_FV_FMA, TYPE)}, \ + {DEF_MACRO(WRITE_BIT, TYPE)}, \ + {DEF_MACRO(WRITE_NIBBLE, TYPE)}, \ + {DEF_MACRO(WRITE_PACKED, TYPE)}, \ + {DEF_MACRO(WRITE_PLANAR, TYPE)}, \ + {DEF_MACRO(PERMUTE, TYPE)}, \ + {DEF_MACRO(COPY, TYPE)}, \ + {DEF_MACRO(SWAP_BYTES, TYPE)}, \ + {DEF_MACRO(EXPAND_BIT, TYPE)}, \ + {DEF_MACRO(EXPAND_PAIR, TYPE)}, \ + {DEF_MACRO(EXPAND_QUAD, TYPE)}, \ + {DEF_MACRO(TO_U8, TYPE)}, \ + {DEF_MACRO(TO_U16, TYPE)}, \ + {DEF_MACRO(TO_U32, TYPE)}, \ + {DEF_MACRO(TO_F32, TYPE)}, \ + {DEF_MACRO(SCALE, TYPE)}, \ + {DEF_MACRO(LINEAR_FMA, TYPE)}, \ + {DEF_MACRO(ADD, TYPE)}, \ + {DEF_MACRO(MIN, TYPE)}, \ + {DEF_MACRO(MAX, TYPE)}, \ + {DEF_MACRO(UNPACK, TYPE)}, \ + {DEF_MACRO(PACK, TYPE)}, \ + {DEF_MACRO(LSHIFT, TYPE)}, \ + {DEF_MACRO(RSHIFT, TYPE)}, \ + {DEF_MACRO(CLEAR, TYPE)}, \ + {DEF_MACRO(DITHER, TYPE)} + +DEF_ALL_MACROS(U8) +DEF_ALL_MACROS(U16) +DEF_ALL_MACROS(U32) +DEF_ALL_MACROS(F32) diff --git a/tests/ref/fate/source b/tests/ref/fate/source index e40e8f0b86..d5eb31d887 100644 --- a/tests/ref/fate/source +++ b/tests/ref/fate/source @@ -28,6 +28,7 @@ compat/float/float.h compat/float/limits.h compat/stdbit/stdbit.h libavcodec/bitstream_template.h +libswscale/x86/uops_macros.asm.h tools/decode_simple.h Use of av_clip() where av_clip_uintp2() could be used: Use of av_clip() where av_clip_intp2() could be used: