mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-11 08:13:06 +00:00
swscale/x86/uops: generate NASM macros using uops_macros.h
Rather than hard-coding a separate set of NASM macros, or generating them with a separate function, we can just leverage the C preprocessor to generate a NASM source file *from* the existing ops macros. This is maybe a bit unorthodox, but it avoids unnecessary overhead from re-generating the macros twice, avoids manual updating of the NASM macros, and generally does not come with any real downside except being a bit ugly. The main source of ugliness is the fact that the C preprocessor expands everything into a single line, whereas NASM expects separate statements to be on separate lines. Very fortunately, we can work around this by writing a another NASM macro to take its arguments and dump them onto multiple lines. It may seem premature, but I went ahead and defined all the macros, since it was easy enough to do. I added the %include in this commit to trigger build errors that occur only as a result of introducing this file in the same commit that introduces it. Signed-off-by: Niklas Haas <git@haasn.dev>
This commit is contained in:
@@ -26,6 +26,7 @@
|
|||||||
*.spv
|
*.spv
|
||||||
*.spv.c
|
*.spv.c
|
||||||
*.spv.gz
|
*.spv.gz
|
||||||
|
*.gen.asm
|
||||||
*.gen.c
|
*.gen.c
|
||||||
*.gen.S
|
*.gen.S
|
||||||
*.ptx
|
*.ptx
|
||||||
|
|||||||
+1
-1
@@ -254,7 +254,7 @@ $(TOOLOBJS): | tools
|
|||||||
|
|
||||||
OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS))
|
OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS))
|
||||||
|
|
||||||
CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.gz *.spv.c *.gen.c *.gen.S *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *~ *.ilk *.pdb
|
CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.gz *.spv.c *.gen.asm *.gen.c *.gen.S *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *~ *.ilk *.pdb
|
||||||
LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
|
LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
|
||||||
|
|
||||||
define RULES
|
define RULES
|
||||||
|
|||||||
@@ -15,9 +15,18 @@ X86ASM-OBJS += x86/input.o \
|
|||||||
x86/yuv_2_rgb.o \
|
x86/yuv_2_rgb.o \
|
||||||
x86/yuv2yuvX.o \
|
x86/yuv2yuvX.o \
|
||||||
|
|
||||||
|
SKIPHEADERS += x86/uops_macros.asm.h
|
||||||
|
|
||||||
ifdef ARCH_X86_64
|
ifdef ARCH_X86_64
|
||||||
X86ASM-OBJS-$(CONFIG_UNSTABLE) += x86/ops_common.o \
|
X86ASM-OBJS-$(CONFIG_UNSTABLE) += x86/ops_common.o \
|
||||||
x86/ops_int.o \
|
x86/ops_int.o \
|
||||||
x86/ops_float.o \
|
x86/ops_float.o \
|
||||||
x86/ops.o
|
x86/ops.o
|
||||||
|
|
||||||
|
$(SUBDIR)x86/ops_common.o: $(SUBDIR)x86/uops_macros.gen.asm
|
||||||
|
$(SUBDIR)x86/ops_int.o: $(SUBDIR)x86/uops_macros.gen.asm
|
||||||
|
$(SUBDIR)x86/ops_float.o: $(SUBDIR)x86/uops_macros.gen.asm
|
||||||
|
$(SUBDIR)x86/uops_macros.gen.asm: $(SRC_PATH)/libswscale/x86/uops_macros.asm.h \
|
||||||
|
$(SRC_PATH)/libswscale/uops_macros.h
|
||||||
|
$(HOSTCC) $(CC_E) $(CPPFLAGS) $<
|
||||||
endif
|
endif
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
;******************************************************************************
|
;******************************************************************************
|
||||||
|
|
||||||
%include "libavutil/x86/x86util.asm"
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
%include "libswscale/x86/uops_macros.gen.asm"
|
||||||
|
|
||||||
; High-level explanation of how the x86 backend works:
|
; High-level explanation of how the x86 backend works:
|
||||||
;
|
;
|
||||||
|
|||||||
@@ -0,0 +1,89 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (C) 2025 Niklas Haas
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* WARNING: This file is preprocessed by the C compiler to generate the NASM
|
||||||
|
* source file uops_macros.gen.asm. It must *NOT* include any headers that
|
||||||
|
* may inadvertently declare any C syntax. The following header only declares
|
||||||
|
* further macros, and is therefore safe.
|
||||||
|
*/
|
||||||
|
#include "../uops_macros.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NASM expects one statement per source line, but the C preprocessor expands
|
||||||
|
* multi-line macros into a single line. This NASM macro allows us to work
|
||||||
|
* around that by expanding its arguments into multiple lines.
|
||||||
|
*/
|
||||||
|
%macro MULTILINE 0-*
|
||||||
|
%rep %0
|
||||||
|
%1
|
||||||
|
%rotate 1
|
||||||
|
%endrep
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
/* Used to suppress "trailing empty parameter" warnings */
|
||||||
|
%macro dummy 0
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
#define DECL_OP_MACRO(...) {DECL_OP MACRO __VA_ARGS__},
|
||||||
|
#define DEF_MACRO(UOP, TYPE) \
|
||||||
|
%define DECL_##TYPE##_##UOP(MACRO) \
|
||||||
|
MULTILINE SWS_FOR_##TYPE##_##UOP(DECL_OP_MACRO) \
|
||||||
|
dummy
|
||||||
|
|
||||||
|
#define DEF_ALL_MACROS(TYPE) \
|
||||||
|
MULTILINE \
|
||||||
|
{DEF_MACRO(READ_BIT, TYPE)}, \
|
||||||
|
{DEF_MACRO(READ_NIBBLE, TYPE)}, \
|
||||||
|
{DEF_MACRO(READ_PACKED, TYPE)}, \
|
||||||
|
{DEF_MACRO(READ_PLANAR, TYPE)}, \
|
||||||
|
{DEF_MACRO(READ_PLANAR_FH, TYPE)}, \
|
||||||
|
{DEF_MACRO(READ_PLANAR_FV, TYPE)}, \
|
||||||
|
{DEF_MACRO(READ_PLANAR_FV_FMA, TYPE)}, \
|
||||||
|
{DEF_MACRO(WRITE_BIT, TYPE)}, \
|
||||||
|
{DEF_MACRO(WRITE_NIBBLE, TYPE)}, \
|
||||||
|
{DEF_MACRO(WRITE_PACKED, TYPE)}, \
|
||||||
|
{DEF_MACRO(WRITE_PLANAR, TYPE)}, \
|
||||||
|
{DEF_MACRO(PERMUTE, TYPE)}, \
|
||||||
|
{DEF_MACRO(COPY, TYPE)}, \
|
||||||
|
{DEF_MACRO(SWAP_BYTES, TYPE)}, \
|
||||||
|
{DEF_MACRO(EXPAND_BIT, TYPE)}, \
|
||||||
|
{DEF_MACRO(EXPAND_PAIR, TYPE)}, \
|
||||||
|
{DEF_MACRO(EXPAND_QUAD, TYPE)}, \
|
||||||
|
{DEF_MACRO(TO_U8, TYPE)}, \
|
||||||
|
{DEF_MACRO(TO_U16, TYPE)}, \
|
||||||
|
{DEF_MACRO(TO_U32, TYPE)}, \
|
||||||
|
{DEF_MACRO(TO_F32, TYPE)}, \
|
||||||
|
{DEF_MACRO(SCALE, TYPE)}, \
|
||||||
|
{DEF_MACRO(LINEAR_FMA, TYPE)}, \
|
||||||
|
{DEF_MACRO(ADD, TYPE)}, \
|
||||||
|
{DEF_MACRO(MIN, TYPE)}, \
|
||||||
|
{DEF_MACRO(MAX, TYPE)}, \
|
||||||
|
{DEF_MACRO(UNPACK, TYPE)}, \
|
||||||
|
{DEF_MACRO(PACK, TYPE)}, \
|
||||||
|
{DEF_MACRO(LSHIFT, TYPE)}, \
|
||||||
|
{DEF_MACRO(RSHIFT, TYPE)}, \
|
||||||
|
{DEF_MACRO(CLEAR, TYPE)}, \
|
||||||
|
{DEF_MACRO(DITHER, TYPE)}
|
||||||
|
|
||||||
|
DEF_ALL_MACROS(U8)
|
||||||
|
DEF_ALL_MACROS(U16)
|
||||||
|
DEF_ALL_MACROS(U32)
|
||||||
|
DEF_ALL_MACROS(F32)
|
||||||
@@ -28,6 +28,7 @@ compat/float/float.h
|
|||||||
compat/float/limits.h
|
compat/float/limits.h
|
||||||
compat/stdbit/stdbit.h
|
compat/stdbit/stdbit.h
|
||||||
libavcodec/bitstream_template.h
|
libavcodec/bitstream_template.h
|
||||||
|
libswscale/x86/uops_macros.asm.h
|
||||||
tools/decode_simple.h
|
tools/decode_simple.h
|
||||||
Use of av_clip() where av_clip_uintp2() could be used:
|
Use of av_clip() where av_clip_uintp2() could be used:
|
||||||
Use of av_clip() where av_clip_intp2() could be used:
|
Use of av_clip() where av_clip_intp2() could be used:
|
||||||
|
|||||||
Reference in New Issue
Block a user