mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-11 08:13:06 +00:00
swscale/x86/uops: generate NASM macros using uops_macros.h
Rather than hard-coding a separate set of NASM macros, or generating them with a separate function, we can just leverage the C preprocessor to generate a NASM source file *from* the existing ops macros. This is maybe a bit unorthodox, but it avoids unnecessary overhead from re-generating the macros twice, avoids manual updating of the NASM macros, and generally does not come with any real downside except being a bit ugly. The main source of ugliness is the fact that the C preprocessor expands everything into a single line, whereas NASM expects separate statements to be on separate lines. Very fortunately, we can work around this by writing a another NASM macro to take its arguments and dump them onto multiple lines. It may seem premature, but I went ahead and defined all the macros, since it was easy enough to do. I added the %include in this commit to trigger build errors that occur only as a result of introducing this file in the same commit that introduces it. Signed-off-by: Niklas Haas <git@haasn.dev>
This commit is contained in:
@@ -26,6 +26,7 @@
|
||||
*.spv
|
||||
*.spv.c
|
||||
*.spv.gz
|
||||
*.gen.asm
|
||||
*.gen.c
|
||||
*.gen.S
|
||||
*.ptx
|
||||
|
||||
+1
-1
@@ -254,7 +254,7 @@ $(TOOLOBJS): | tools
|
||||
|
||||
OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS))
|
||||
|
||||
CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.gz *.spv.c *.gen.c *.gen.S *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *~ *.ilk *.pdb
|
||||
CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.gz *.spv.c *.gen.asm *.gen.c *.gen.S *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *~ *.ilk *.pdb
|
||||
LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
|
||||
|
||||
define RULES
|
||||
|
||||
@@ -15,9 +15,18 @@ X86ASM-OBJS += x86/input.o \
|
||||
x86/yuv_2_rgb.o \
|
||||
x86/yuv2yuvX.o \
|
||||
|
||||
SKIPHEADERS += x86/uops_macros.asm.h
|
||||
|
||||
ifdef ARCH_X86_64
|
||||
X86ASM-OBJS-$(CONFIG_UNSTABLE) += x86/ops_common.o \
|
||||
x86/ops_int.o \
|
||||
x86/ops_float.o \
|
||||
x86/ops.o
|
||||
|
||||
$(SUBDIR)x86/ops_common.o: $(SUBDIR)x86/uops_macros.gen.asm
|
||||
$(SUBDIR)x86/ops_int.o: $(SUBDIR)x86/uops_macros.gen.asm
|
||||
$(SUBDIR)x86/ops_float.o: $(SUBDIR)x86/uops_macros.gen.asm
|
||||
$(SUBDIR)x86/uops_macros.gen.asm: $(SRC_PATH)/libswscale/x86/uops_macros.asm.h \
|
||||
$(SRC_PATH)/libswscale/uops_macros.h
|
||||
$(HOSTCC) $(CC_E) $(CPPFLAGS) $<
|
||||
endif
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
%include "libswscale/x86/uops_macros.gen.asm"
|
||||
|
||||
; High-level explanation of how the x86 backend works:
|
||||
;
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
/**
|
||||
* Copyright (C) 2025 Niklas Haas
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* WARNING: This file is preprocessed by the C compiler to generate the NASM
|
||||
* source file uops_macros.gen.asm. It must *NOT* include any headers that
|
||||
* may inadvertently declare any C syntax. The following header only declares
|
||||
* further macros, and is therefore safe.
|
||||
*/
|
||||
#include "../uops_macros.h"
|
||||
|
||||
/**
|
||||
* NASM expects one statement per source line, but the C preprocessor expands
|
||||
* multi-line macros into a single line. This NASM macro allows us to work
|
||||
* around that by expanding its arguments into multiple lines.
|
||||
*/
|
||||
%macro MULTILINE 0-*
|
||||
%rep %0
|
||||
%1
|
||||
%rotate 1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
/* Used to suppress "trailing empty parameter" warnings */
|
||||
%macro dummy 0
|
||||
%endmacro
|
||||
|
||||
#define DECL_OP_MACRO(...) {DECL_OP MACRO __VA_ARGS__},
|
||||
#define DEF_MACRO(UOP, TYPE) \
|
||||
%define DECL_##TYPE##_##UOP(MACRO) \
|
||||
MULTILINE SWS_FOR_##TYPE##_##UOP(DECL_OP_MACRO) \
|
||||
dummy
|
||||
|
||||
#define DEF_ALL_MACROS(TYPE) \
|
||||
MULTILINE \
|
||||
{DEF_MACRO(READ_BIT, TYPE)}, \
|
||||
{DEF_MACRO(READ_NIBBLE, TYPE)}, \
|
||||
{DEF_MACRO(READ_PACKED, TYPE)}, \
|
||||
{DEF_MACRO(READ_PLANAR, TYPE)}, \
|
||||
{DEF_MACRO(READ_PLANAR_FH, TYPE)}, \
|
||||
{DEF_MACRO(READ_PLANAR_FV, TYPE)}, \
|
||||
{DEF_MACRO(READ_PLANAR_FV_FMA, TYPE)}, \
|
||||
{DEF_MACRO(WRITE_BIT, TYPE)}, \
|
||||
{DEF_MACRO(WRITE_NIBBLE, TYPE)}, \
|
||||
{DEF_MACRO(WRITE_PACKED, TYPE)}, \
|
||||
{DEF_MACRO(WRITE_PLANAR, TYPE)}, \
|
||||
{DEF_MACRO(PERMUTE, TYPE)}, \
|
||||
{DEF_MACRO(COPY, TYPE)}, \
|
||||
{DEF_MACRO(SWAP_BYTES, TYPE)}, \
|
||||
{DEF_MACRO(EXPAND_BIT, TYPE)}, \
|
||||
{DEF_MACRO(EXPAND_PAIR, TYPE)}, \
|
||||
{DEF_MACRO(EXPAND_QUAD, TYPE)}, \
|
||||
{DEF_MACRO(TO_U8, TYPE)}, \
|
||||
{DEF_MACRO(TO_U16, TYPE)}, \
|
||||
{DEF_MACRO(TO_U32, TYPE)}, \
|
||||
{DEF_MACRO(TO_F32, TYPE)}, \
|
||||
{DEF_MACRO(SCALE, TYPE)}, \
|
||||
{DEF_MACRO(LINEAR_FMA, TYPE)}, \
|
||||
{DEF_MACRO(ADD, TYPE)}, \
|
||||
{DEF_MACRO(MIN, TYPE)}, \
|
||||
{DEF_MACRO(MAX, TYPE)}, \
|
||||
{DEF_MACRO(UNPACK, TYPE)}, \
|
||||
{DEF_MACRO(PACK, TYPE)}, \
|
||||
{DEF_MACRO(LSHIFT, TYPE)}, \
|
||||
{DEF_MACRO(RSHIFT, TYPE)}, \
|
||||
{DEF_MACRO(CLEAR, TYPE)}, \
|
||||
{DEF_MACRO(DITHER, TYPE)}
|
||||
|
||||
DEF_ALL_MACROS(U8)
|
||||
DEF_ALL_MACROS(U16)
|
||||
DEF_ALL_MACROS(U32)
|
||||
DEF_ALL_MACROS(F32)
|
||||
@@ -28,6 +28,7 @@ compat/float/float.h
|
||||
compat/float/limits.h
|
||||
compat/stdbit/stdbit.h
|
||||
libavcodec/bitstream_template.h
|
||||
libswscale/x86/uops_macros.asm.h
|
||||
tools/decode_simple.h
|
||||
Use of av_clip() where av_clip_uintp2() could be used:
|
||||
Use of av_clip() where av_clip_intp2() could be used:
|
||||
|
||||
Reference in New Issue
Block a user