tests/checkasm: switch to external checkasm

There are a number of benefits tied to the upstream / third-party checkasm
version, including:

- Improved long-term maintainability, code reuse with other projects, etc.

- Vastly improved overall performance / runtime for benchmarking, due
  primarily to the ability to scale the runtime of each test to that test's
  complexity.

- Much more robust statistical analysis of benchmarking results; including
  robust outlier rejection, an estimation of the histogram, and the ability
  to report the variance / stddev in addition to the (trimmed) mean.

- Interactive HTML and JSON output formats in addition to CSV/TSV.

- More readable and user-friendly output across the board, especially for
  failures and data dumps (e.g. also showing errors inside padding bytes).

- Better cross-platform support, including dynamic fallback of timer
  implementations on ARM platforms, a better RISC-V harness, and more.

There are multiple approaches to how we can solve the problem of integrating
this third party checkasm into dav1d, but I think the hybrid approach of
loading it as an external dependency, falling back to a meson wrap file,
provides the best overall compromise. This avoids the messiness of git e.g.
git submodules, while still allowing us to pin individual tags.
This commit is contained in:
Niklas Haas
2026-01-01 17:33:55 +01:00
parent 3374404179
commit 3a2a874994
22 changed files with 108 additions and 2909 deletions
-2
View File
@@ -86,8 +86,6 @@ cdata.set10('TRIM_DSP_FUNCTIONS', get_option('trim_dsp') == 'true' or
# Logging option # Logging option
cdata.set10('CONFIG_LOG', get_option('logging')) cdata.set10('CONFIG_LOG', get_option('logging'))
cdata.set10('CONFIG_MACOS_KPERF', get_option('macos_kperf'))
# #
# OS/Compiler checks and defines # OS/Compiler checks and defines
# #
-5
View File
@@ -68,8 +68,3 @@ option('trim_dsp',
choices: ['true', 'false', 'if-release'], choices: ['true', 'false', 'if-release'],
value: 'if-release', value: 'if-release',
description: 'Eliminate redundant DSP functions where possible') description: 'Eliminate redundant DSP functions where possible')
option('macos_kperf',
type: 'boolean',
value: false,
description: 'Use the private macOS kperf API for benchmarking')
+4
View File
@@ -0,0 +1,4 @@
[wrap-git]
url = https://code.videolan.org/videolan/checkasm.git
revision = v1.0.1
directory = checkasm
-201
View File
@@ -1,201 +0,0 @@
/******************************************************************************
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2015 Martin Storsjo
* Copyright © 2015 Janne Grunau
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#define PRIVATE_PREFIX checkasm_
#include "src/arm/asm.S"
#include "src/arm/32/util.S"
const register_init, align=3
.quad 0x21f86d66c8ca00ce
.quad 0x75b6ba21077c48ad
.quad 0xed56bb2dcb3c7736
.quad 0x8bda43d3fd1a7e06
.quad 0xb64a9c9e5d318408
.quad 0xdf9a54b303f1d3a3
.quad 0x4a75479abd64e097
.quad 0x249214109d5d1c88
endconst
const error_message_fpscr
.asciz "failed to preserve register FPSCR, changed bits: %x"
error_message_gpr:
.asciz "failed to preserve register r%d"
error_message_vfp:
.asciz "failed to preserve register d%d"
error_message_stack:
.asciz "failed to preserve stack"
endconst
@ max number of args used by any asm function.
#define MAX_ARGS 15
#define ARG_STACK 4*(MAX_ARGS - 4)
@ Align the used stack space to 8 to preserve the stack alignment.
@ +8 for stack canary reference.
#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed + 8)
.macro clobbercheck variant
.equ pushed, 4*9
function checked_call_\variant, export=1
push {r4-r11, lr}
.ifc \variant, vfp
vpush {d8-d15}
fmrx r4, FPSCR
push {r4}
.equ pushed, pushed + 16*4 + 4
.endif
movrel r12, register_init
.ifc \variant, vfp
vldm r12, {d8-d15}
.endif
ldm r12, {r4-r11}
sub sp, sp, #ARG_STACK_A
.equ pos, 0
.rept MAX_ARGS-4
ldr r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
str r12, [sp, #pos]
.equ pos, pos + 4
.endr
@ For stack overflows, the callee is free to overwrite the parameters
@ that were passed on the stack (if any), so we can only check after
@ that point. First figure out how many parameters the function
@ really took on the stack:
ldr r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
@ Load the first non-parameter value from the stack, that should be
@ left untouched by the function. Store a copy of it inverted, so that
@ e.g. overwriting everything with zero would be noticed.
ldr r12, [sp, r12, lsl #2]
mvn r12, r12
str r12, [sp, #ARG_STACK_A - 4]
mov r12, r0
mov r0, r2
mov r1, r3
ldr r2, [sp, #ARG_STACK_A + pushed]
ldr r3, [sp, #ARG_STACK_A + pushed + 4]
@ Call the target function
v4blx r12
@ Load the number of stack parameters, stack canary and its reference
ldr r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
ldr r2, [sp, r12, lsl #2]
ldr r3, [sp, #ARG_STACK_A - 4]
add sp, sp, #ARG_STACK_A
push {r0, r1}
mvn r3, r3
cmp r2, r3
bne 5f
movrel r12, register_init
.ifc \variant, vfp
.macro check_reg_vfp, dreg, offset
ldr r2, [r12, #(8 * (\offset))]
ldr r3, [r12, #(8 * (\offset)) + 4]
vmov r0, lr, \dreg
eor r2, r2, r0
eor r3, r3, lr
orrs r2, r2, r3
bne 4f
.endm
.irp n, 8, 9, 10, 11, 12, 13, 14, 15
@ keep track of the checked double/SIMD register
mov r1, #\n
check_reg_vfp d\n, \n-8
.endr
.purgem check_reg_vfp
fmrx r1, FPSCR
ldr r3, [sp, #8]
eor r1, r1, r3
@ Ignore changes in bits 0-4 and 7
bic r1, r1, #0x9f
@ Ignore changes in the topmost 5 bits
bics r1, r1, #0xf8000000
bne 3f
.endif
@ keep track of the checked GPR
mov r1, #4
.macro check_reg reg1, reg2=
ldr r2, [r12], #4
ldr r3, [r12], #4
eors r2, r2, \reg1
bne 2f
add r1, r1, #1
.ifnb \reg2
eors r3, r3, \reg2
bne 2f
.endif
add r1, r1, #1
.endm
check_reg r4, r5
check_reg r6, r7
@ r9 is a volatile register in the ios ABI
#ifdef __APPLE__
check_reg r8
#else
check_reg r8, r9
#endif
check_reg r10, r11
.purgem check_reg
b 0f
5:
movrel r0, error_message_stack
b 1f
4:
movrel r0, error_message_vfp
b 1f
3:
movrel r0, error_message_fpscr
b 1f
2:
movrel r0, error_message_gpr
1:
bl X(fail_func)
0:
pop {r0, r1}
.ifc \variant, vfp
pop {r2}
fmxr FPSCR, r2
vpop {d8-d15}
.endif
pop {r4-r11, pc}
endfunc
.endm
clobbercheck novfp
clobbercheck vfp
-217
View File
@@ -1,217 +0,0 @@
/******************************************************************************
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2015 Martin Storsjo
* Copyright © 2015 Janne Grunau
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#define PRIVATE_PREFIX checkasm_
#include "src/arm/asm.S"
#include "src/arm/64/util.S"
const register_init, align=4
.quad 0x21f86d66c8ca00ce
.quad 0x75b6ba21077c48ad
.quad 0xed56bb2dcb3c7736
.quad 0x8bda43d3fd1a7e06
.quad 0xb64a9c9e5d318408
.quad 0xdf9a54b303f1d3a3
.quad 0x4a75479abd64e097
.quad 0x249214109d5d1c88
.quad 0x1a1b2550a612b48c
.quad 0x79445c159ce79064
.quad 0x2eed899d5a28ddcd
.quad 0x86b2536fcd8cf636
.quad 0xb0856806085e7943
.quad 0x3f2bf84fc0fcca4e
.quad 0xacbd382dcf5b8de2
.quad 0xd229e1f5b281303f
.quad 0x71aeaff20b095fd9
.quad 0xab63e2e11fa38ed9
endconst
const error_message_register
.asciz "failed to preserve register"
error_message_stack:
.asciz "stack clobbered"
endconst
// max number of args used by any asm function.
#define MAX_ARGS 15
#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
function stack_clobber, export=1
mov x3, sp
mov x2, #CLOBBER_STACK
1:
stp x0, x1, [sp, #-16]!
subs x2, x2, #16
b.gt 1b
mov sp, x3
ret
endfunc
// + 16 for stack canary reference
#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15 + 16)
function checked_call, export=1
stp x29, x30, [sp, #-16]!
mov x29, sp
stp x19, x20, [sp, #-16]!
stp x21, x22, [sp, #-16]!
stp x23, x24, [sp, #-16]!
stp x25, x26, [sp, #-16]!
stp x27, x28, [sp, #-16]!
stp d8, d9, [sp, #-16]!
stp d10, d11, [sp, #-16]!
stp d12, d13, [sp, #-16]!
stp d14, d15, [sp, #-16]!
movrel x9, register_init
ldp d8, d9, [x9], #16
ldp d10, d11, [x9], #16
ldp d12, d13, [x9], #16
ldp d14, d15, [x9], #16
ldp x19, x20, [x9], #16
ldp x21, x22, [x9], #16
ldp x23, x24, [x9], #16
ldp x25, x26, [x9], #16
ldp x27, x28, [x9], #16
sub sp, sp, #ARG_STACK
.equ pos, 0
.rept MAX_ARGS-8
// Skip the first 8 args, that are loaded into registers
ldr x9, [x29, #16 + 8*8 + pos]
str x9, [sp, #pos]
.equ pos, pos + 8
.endr
// Fill x8-x17 with garbage. This doesn't have to be preserved,
// but avoids relying on them having any particular value.
movrel x9, register_init
ldp x10, x11, [x9], #32
ldp x12, x13, [x9], #32
ldp x14, x15, [x9], #32
ldp x16, x17, [x9], #32
ldp x8, x9, [x9]
// For stack overflows, the callee is free to overwrite the parameters
// that were passed on the stack (if any), so we can only check after
// that point. First figure out how many parameters the function
// really took on the stack:
ldr w2, [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
// Load the first non-parameter value from the stack, that should be
// left untouched by the function. Store a copy of it inverted, so that
// e.g. overwriting everything with zero would be noticed.
ldr x2, [sp, x2, lsl #3]
mvn x2, x2
str x2, [sp, #ARG_STACK-8]
// Load the in-register arguments
mov x12, x0
ldp x0, x1, [x29, #16]
ldp x2, x3, [x29, #32]
ldp x4, x5, [x29, #48]
ldp x6, x7, [x29, #64]
// Call the target function
blr x12
// Load the number of stack parameters, stack canary and its reference
ldr w2, [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
ldr x2, [sp, x2, lsl #3]
ldr x3, [sp, #ARG_STACK-8]
add sp, sp, #ARG_STACK
stp x0, x1, [sp, #-16]!
mvn x3, x3
cmp x2, x3
b.ne 2f
movrel x9, register_init
movi v3.8h, #0
.macro check_reg_neon reg1, reg2
ldr q1, [x9], #16
uzp1 v2.2d, v\reg1\().2d, v\reg2\().2d
eor v1.16b, v1.16b, v2.16b
orr v3.16b, v3.16b, v1.16b
.endm
check_reg_neon 8, 9
check_reg_neon 10, 11
check_reg_neon 12, 13
check_reg_neon 14, 15
uqxtn v3.8b, v3.8h
umov x3, v3.d[0]
.macro check_reg reg1, reg2
ldp x0, x1, [x9], #16
eor x0, x0, \reg1
eor x1, x1, \reg2
orr x3, x3, x0
orr x3, x3, x1
.endm
check_reg x19, x20
check_reg x21, x22
check_reg x23, x24
check_reg x25, x26
check_reg x27, x28
cbz x3, 0f
movrel x0, error_message_register
b 1f
2:
movrel x0, error_message_stack
1:
bl X(fail_func)
0:
ldp x0, x1, [sp], #16
ldp d14, d15, [sp], #16
ldp d12, d13, [sp], #16
ldp d10, d11, [sp], #16
ldp d8, d9, [sp], #16
ldp x27, x28, [sp], #16
ldp x25, x26, [sp], #16
ldp x23, x24, [sp], #16
ldp x21, x22, [sp], #16
ldp x19, x20, [sp], #16
ldp x29, x30, [sp], #16
ret
endfunc
#if HAVE_SVE
ENABLE_SVE
function sve_length, export=1
cntb x0
lsl x0, x0, #3
ret
endfunc
DISABLE_SVE
#endif
+1 -1
View File
@@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "tests/checkasm/checkasm.h" #include "tests/checkasm/internal.h"
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
+16 -1049
View File
File diff suppressed because it is too large Load Diff
-458
View File
@@ -1,458 +0,0 @@
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DAV1D_TESTS_CHECKASM_CHECKASM_H
#define DAV1D_TESTS_CHECKASM_CHECKASM_H
#include "config.h"
#include <stdint.h>
#include <stdlib.h>
#ifdef _WIN32
#include <windows.h>
#if ARCH_X86_32
#include <setjmp.h>
typedef jmp_buf checkasm_context;
#define checkasm_save_context() setjmp(checkasm_context_buf)
#define checkasm_load_context() longjmp(checkasm_context_buf, 1)
#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
/* setjmp/longjmp on Windows on architectures using SEH (all except x86_32)
* will try to use SEH to unwind the stack, which doesn't work for assembly
* functions without unwind information. */
typedef struct { CONTEXT c; int status; } checkasm_context;
#define checkasm_save_context() \
(checkasm_context_buf.status = 0, \
RtlCaptureContext(&checkasm_context_buf.c), \
checkasm_context_buf.status)
#define checkasm_load_context() \
(checkasm_context_buf.status = 1, \
RtlRestoreContext(&checkasm_context_buf.c, NULL))
#else
typedef void* checkasm_context;
#define checkasm_save_context() 0
#define checkasm_load_context() do {} while (0)
#endif
#else
#include <setjmp.h>
typedef sigjmp_buf checkasm_context;
#define checkasm_save_context() sigsetjmp(checkasm_context_buf, 1)
#define checkasm_load_context() siglongjmp(checkasm_context_buf, 1)
#endif
#include "include/common/attributes.h"
#include "include/common/bitdepth.h"
#include "include/common/intops.h"
#if ARCH_ARM
#include "src/arm/arm-arch.h"
#endif
int xor128_rand(void);
#define rnd xor128_rand
#define decl_check_bitfns(name) \
name##_8bpc(void); \
name##_16bpc(void)
void checkasm_check_msac(void);
void checkasm_check_pal(void);
void checkasm_check_refmvs(void);
decl_check_bitfns(void checkasm_check_cdef);
decl_check_bitfns(void checkasm_check_filmgrain);
decl_check_bitfns(void checkasm_check_ipred);
decl_check_bitfns(void checkasm_check_itx);
decl_check_bitfns(void checkasm_check_loopfilter);
decl_check_bitfns(void checkasm_check_looprestoration);
decl_check_bitfns(void checkasm_check_mc);
void *checkasm_check_func(void *func, const char *name, ...);
int checkasm_bench_func(void);
int checkasm_fail_func(const char *msg, ...);
void checkasm_update_bench(int iterations, uint64_t cycles);
void checkasm_report(const char *name, ...);
void checkasm_set_signal_handler_state(int enabled);
void checkasm_handle_signal(void);
extern checkasm_context checkasm_context_buf;
/* float compare utilities */
int float_near_ulp(float a, float b, unsigned max_ulp);
int float_near_abs_eps(float a, float b, float eps);
int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp);
int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
int len);
int float_near_abs_eps_array(const float *a, const float *b, float eps,
int len);
int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
unsigned max_ulp, int len);
#define BENCH_RUNS (1 << 12) /* Trade-off between accuracy and speed */
/* Decide whether or not the specified function needs to be tested */
#define check_func(func, ...)\
(func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
/* Declare the function prototype. The first argument is the return value,
* the remaining arguments are the function parameters. Naming parameters
* is optional. */
#define declare_func(ret, ...)\
declare_new(ret, __VA_ARGS__)\
void *func_ref, *func_new;\
typedef ret func_type(__VA_ARGS__);\
if (checkasm_save_context()) checkasm_handle_signal()
/* Indicate that the current test has failed */
#define fail() checkasm_fail_func("%s:%d", __FILE__, __LINE__)
/* Print the test outcome */
#define report checkasm_report
/* Call the reference function */
#define call_ref(...)\
(checkasm_set_signal_handler_state(1),\
((func_type *)func_ref)(__VA_ARGS__));\
checkasm_set_signal_handler_state(0)
#if HAVE_ASM
#if ARCH_X86
#if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h>
#define readtime() (_mm_lfence(), __rdtsc())
#else
static inline uint64_t readtime(void) {
uint32_t eax, edx;
__asm__ __volatile__("lfence\nrdtsc" : "=a"(eax), "=d"(edx));
return (((uint64_t)edx) << 32) | eax;
}
#define readtime readtime
#endif
#elif CONFIG_MACOS_KPERF
uint64_t checkasm_kperf_cycles(void);
#define readtime() checkasm_kperf_cycles()
#elif (ARCH_AARCH64 || ARCH_ARM) && defined(__APPLE__)
#include <mach/mach_time.h>
#define readtime() mach_absolute_time()
#elif ARCH_AARCH64
#ifdef _MSC_VER
#include <windows.h>
#define readtime() (_InstructionSynchronizationBarrier(), ReadTimeStampCounter())
#else
static inline uint64_t readtime(void) {
uint64_t cycle_counter;
/* This requires enabling user mode access to the cycle counter (which
* can only be done from kernel space).
* This could also read cntvct_el0 instead of pmccntr_el0; that register
* might also be readable (depending on kernel version), but it has much
* worse precision (it's a fixed 50 MHz timer). */
__asm__ __volatile__("isb\nmrs %0, pmccntr_el0"
: "=r"(cycle_counter)
:: "memory");
return cycle_counter;
}
#define readtime readtime
#endif
#elif ARCH_ARM && !defined(_MSC_VER) && __ARM_ARCH >= 7
static inline uint64_t readtime(void) {
uint32_t cycle_counter;
/* This requires enabling user mode access to the cycle counter (which
* can only be done from kernel space). */
__asm__ __volatile__("isb\nmrc p15, 0, %0, c9, c13, 0"
: "=r"(cycle_counter)
:: "memory");
return cycle_counter;
}
#define readtime readtime
#elif ARCH_PPC64LE
static inline uint64_t readtime(void) {
uint32_t tbu, tbl, temp;
__asm__ __volatile__(
"1:\n"
"mfspr %2,269\n"
"mfspr %0,268\n"
"mfspr %1,269\n"
"cmpw %2,%1\n"
"bne 1b\n"
: "=r"(tbl), "=r"(tbu), "=r"(temp)
:
: "cc");
return (((uint64_t)tbu) << 32) | (uint64_t)tbl;
}
#define readtime readtime
#elif ARCH_RISCV
#include <time.h>
static inline uint64_t clock_gettime_nsec(void) {
struct timespec ts;
#ifdef CLOCK_MONOTONIC_RAW
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
#else
clock_gettime(CLOCK_MONOTONIC, &ts);
#endif
return ((uint64_t)ts.tv_sec*1000000000u) + (uint64_t)ts.tv_nsec;
}
#define readtime clock_gettime_nsec
#elif ARCH_LOONGARCH
static inline uint64_t readtime(void) {
#if ARCH_LOONGARCH64
uint64_t a, id;
__asm__ __volatile__("rdtime.d %0, %1"
: "=r"(a), "=r"(id)
:: );
return a;
#else
uint32_t a, id;
__asm__ __volatile__("rdtimel.w %0, %1"
: "=r"(a), "=r"(id)
:: );
return (uint64_t)a;
#endif
}
#define readtime readtime
#endif
/* Verifies that clobbered callee-saved registers
* are properly saved and restored */
void checkasm_checked_call(void *func, ...);
#if ARCH_X86_64
/* YMM and ZMM registers on x86 are turned off to save power when they haven't
* been used for some period of time. When they are used there will be a
* "warmup" period during which performance will be reduced and inconsistent
* which is problematic when trying to benchmark individual functions. We can
* work around this by periodically issuing "dummy" instructions that uses
* those registers to keep them powered on. */
void checkasm_simd_warmup(void);
/* The upper 32 bits of 32-bit data types are undefined when passed as function
* parameters. In practice those bits usually end up being zero which may hide
* certain bugs, such as using a register containing undefined bits as a pointer
* offset, so we want to intentionally clobber those bits with junk to expose
* any issues. The following set of macros automatically calculates a bitmask
* specifying which parameters should have their upper halves clobbered. */
#ifdef _WIN32
/* Integer and floating-point parameters share "register slots". */
#define IGNORED_FP_ARGS 0
#else
/* Up to 8 floating-point parameters are passed in XMM registers, which are
* handled orthogonally from integer parameters passed in GPR registers. */
#define IGNORED_FP_ARGS 8
#endif
#if HAVE_C11_GENERIC
#define clobber_type(arg) _Generic((void (*)(void*, arg))NULL,\
void (*)(void*, int32_t ): clobber_mask |= 1 << mpos++,\
void (*)(void*, uint32_t): clobber_mask |= 1 << mpos++,\
void (*)(void*, float ): mpos += (fp_args++ >= IGNORED_FP_ARGS),\
void (*)(void*, double ): mpos += (fp_args++ >= IGNORED_FP_ARGS),\
default: mpos++)
#define init_clobber_mask(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, ...)\
unsigned clobber_mask = 0;\
{\
int mpos = 0, fp_args = 0;\
clobber_type(a); clobber_type(b); clobber_type(c); clobber_type(d);\
clobber_type(e); clobber_type(f); clobber_type(g); clobber_type(h);\
clobber_type(i); clobber_type(j); clobber_type(k); clobber_type(l);\
clobber_type(m); clobber_type(n); clobber_type(o); clobber_type(p);\
}
#else
/* Skip parameter clobbering on compilers without support for _Generic() */
#define init_clobber_mask(...) unsigned clobber_mask = 0
#endif
#define declare_new(ret, ...)\
ret (*checked_call)(__VA_ARGS__, int, int, int, int, int, int, int,\
int, int, int, int, int, int, int, int, int,\
void*, unsigned) =\
(void*)checkasm_checked_call;\
init_clobber_mask(__VA_ARGS__, void*, void*, void*, void*,\
void*, void*, void*, void*, void*, void*,\
void*, void*, void*, void*, void*);
#define call_new(...)\
(checkasm_set_signal_handler_state(1),\
checkasm_simd_warmup(),\
checked_call(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8,\
7, 6, 5, 4, 3, 2, 1, func_new, clobber_mask));\
checkasm_set_signal_handler_state(0)
#elif ARCH_X86_32
#define declare_new(ret, ...)\
ret (*checked_call)(void *, __VA_ARGS__, int, int, int, int, int, int,\
int, int, int, int, int, int, int, int, int) =\
(void *)checkasm_checked_call;
#define call_new(...)\
(checkasm_set_signal_handler_state(1),\
checked_call(func_new, __VA_ARGS__, 15, 14, 13, 12,\
11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1));\
checkasm_set_signal_handler_state(0)
#elif ARCH_ARM
/* Use a dummy argument, to offset the real parameters by 2, not only 1.
* This makes sure that potential 8-byte-alignment of parameters is kept
* the same even when the extra parameters have been removed. */
extern void (*checkasm_checked_call_ptr)(void *func, int dummy, ...);
#define declare_new(ret, ...)\
ret (*checked_call)(void *, int dummy, __VA_ARGS__,\
int, int, int, int, int, int, int, int,\
int, int, int, int, int, int, int) =\
(void *)checkasm_checked_call_ptr;
#define call_new(...)\
(checkasm_set_signal_handler_state(1),\
checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0));\
checkasm_set_signal_handler_state(0)
#elif ARCH_AARCH64 && !defined(__APPLE__)
void checkasm_stack_clobber(uint64_t clobber, ...);
#define declare_new(ret, ...)\
ret (*checked_call)(void *, int, int, int, int, int, int, int,\
__VA_ARGS__, int, int, int, int, int, int, int, int,\
int, int, int, int, int, int, int) =\
(void *)checkasm_checked_call;
#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
#define call_new(...)\
(checkasm_set_signal_handler_state(1),\
checkasm_stack_clobber(CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
CLOB, CLOB, CLOB, CLOB, CLOB),\
checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\
checkasm_set_signal_handler_state(0)
#elif ARCH_RISCV
#define declare_new(ret, ...)\
ret (*checked_call)(void *, int, int, int, int, int, int, int,\
__VA_ARGS__, int, int, int, int, int, int, int, int,\
int, int, int, int, int, int, int) =\
(void *)checkasm_checked_call;
#define call_new(...)\
(checkasm_set_signal_handler_state(1),\
checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\
checkasm_set_signal_handler_state(0)
#elif ARCH_LOONGARCH
#define declare_new(ret, ...)\
ret (*checked_call)(void *, int, int, int, int, int, int, int,\
__VA_ARGS__, int, int, int, int, int, int, int, int,\
int, int, int, int, int, int, int) =\
(void *)checkasm_checked_call;
#define call_new(...)\
(checkasm_set_signal_handler_state(1),\
checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\
checkasm_set_signal_handler_state(0)
#else
#define declare_new(ret, ...)
#define call_new(...)\
(checkasm_set_signal_handler_state(1),\
((func_type *)func_new)(__VA_ARGS__));\
checkasm_set_signal_handler_state(0)
#endif
#else /* HAVE_ASM */
#define declare_new(ret, ...)
/* Call the function */
#define call_new(...)\
(checkasm_set_signal_handler_state(1),\
((func_type *)func_new)(__VA_ARGS__));\
checkasm_set_signal_handler_state(0)
#endif /* HAVE_ASM */
/* Benchmark the function */
#ifdef readtime
#define bench_new(...)\
do {\
if (checkasm_bench_func()) {\
func_type *const tfunc = func_new;\
checkasm_set_signal_handler_state(1);\
uint64_t tsum = 0;\
int tcount = 0;\
for (int ti = 0; ti < BENCH_RUNS; ti++) {\
uint64_t t = readtime();\
int talt = 0; (void)talt;\
tfunc(__VA_ARGS__);\
talt = 1;\
tfunc(__VA_ARGS__);\
talt = 0;\
tfunc(__VA_ARGS__);\
talt = 1;\
tfunc(__VA_ARGS__);\
t = readtime() - t;\
if (t*tcount <= tsum*4 && ti > 0) {\
tsum += t;\
tcount++;\
}\
}\
checkasm_set_signal_handler_state(0);\
checkasm_update_bench(tcount, tsum);\
} else {\
const int talt = 0; (void)talt;\
call_new(__VA_ARGS__);\
}\
} while (0)
#else
#define bench_new(...) do {} while (0)
#endif
/* Alternates between two pointers. Intended to be used within bench_new()
* calls for functions which modifies their input buffer(s) to ensure that
* throughput, and not latency, is measured. */
#define alternate(a, b) (talt ? (b) : (a))
#define ROUND_UP(x,a) (((x)+((a)-1)) & ~((a)-1))
#define PIXEL_RECT(name, w, h) \
ALIGN_STK_64(pixel, name##_buf, ((h)+32)*(ROUND_UP(w,64)+64) + 64,); \
ptrdiff_t name##_stride = sizeof(pixel)*(ROUND_UP(w,64)+64); \
(void)name##_stride; \
int name##_buf_h = (h)+32; \
(void)name##_buf_h;\
pixel *name = name##_buf + (ROUND_UP(w,64)+64)*16 + 64
#define CLEAR_PIXEL_RECT(name) \
memset(name##_buf, 0x99, sizeof(name##_buf)) \
#define DECL_CHECKASM_CHECK_FUNC(type) \
int checkasm_check_##type(const char *const file, const int line, \
const type *const buf1, const ptrdiff_t stride1, \
const type *const buf2, const ptrdiff_t stride2, \
const int w, const int h, const char *const name, \
const int align_w, const int align_h, \
const int padding)
DECL_CHECKASM_CHECK_FUNC(int8_t);
DECL_CHECKASM_CHECK_FUNC(int16_t);
DECL_CHECKASM_CHECK_FUNC(int32_t);
DECL_CHECKASM_CHECK_FUNC(uint8_t);
DECL_CHECKASM_CHECK_FUNC(uint16_t);
DECL_CHECKASM_CHECK_FUNC(uint32_t);
#define CONCAT(a,b) a ## b
#define checkasm_check2(prefix, ...) CONCAT(checkasm_check_, prefix)(__FILE__, __LINE__, __VA_ARGS__)
#define checkasm_check(prefix, ...) checkasm_check2(prefix, __VA_ARGS__, 0, 0, 0)
#ifdef BITDEPTH
#define checkasm_check_pixel(...) checkasm_check(PIXEL_TYPE, __VA_ARGS__)
#define checkasm_check_pixel_padded(...) checkasm_check2(PIXEL_TYPE, __VA_ARGS__, 1, 1, 8)
#define checkasm_check_pixel_padded_align(...) checkasm_check2(PIXEL_TYPE, __VA_ARGS__, 8)
#define checkasm_check_coef(...) checkasm_check(COEF_TYPE, __VA_ARGS__)
#endif
#endif /* DAV1D_TESTS_CHECKASM_CHECKASM_H */
+1 -1
View File
@@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "tests/checkasm/checkasm.h" #include "tests/checkasm/internal.h"
#include <string.h> #include <string.h>
+66
View File
@@ -0,0 +1,66 @@
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DAV1D_TESTS_CHECKASM_INTERNAL_H
#define DAV1D_TESTS_CHECKASM_INTERNAL_H
#include "config.h"
#include "common/intops.h"
#include <checkasm/test.h>
#include <checkasm/utils.h>
#define rnd checkasm_rand
#define decl_check_bitfns(name) \
name##_8bpc(void); \
name##_16bpc(void)
void checkasm_check_msac(void);
void checkasm_check_pal(void);
void checkasm_check_refmvs(void);
decl_check_bitfns(void checkasm_check_cdef);
decl_check_bitfns(void checkasm_check_filmgrain);
decl_check_bitfns(void checkasm_check_ipred);
decl_check_bitfns(void checkasm_check_itx);
decl_check_bitfns(void checkasm_check_loopfilter);
decl_check_bitfns(void checkasm_check_looprestoration);
decl_check_bitfns(void checkasm_check_mc);
#ifdef BITDEPTH
#define checkasm_check_impl_pixel checkasm_check_impl(PIXEL_TYPE)
#define checkasm_check_pixel(...) checkasm_check(PIXEL_TYPE, __VA_ARGS__)
#define checkasm_check_coef(...) checkasm_check(COEF_TYPE, __VA_ARGS__)
#define PIXEL_RECT(...) BUF_RECT(pixel, __VA_ARGS__)
#define CLEAR_PIXEL_RECT CLEAR_BUF_RECT
#define checkasm_check_pixel_padded checkasm_check_rect_padded
#define checkasm_check_pixel_padded_align checkasm_check_rect_padded_align
#endif
#endif /* DAV1D_TESTS_CHECKASM_INTERNAL_H */
+1 -1
View File
@@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "tests/checkasm/checkasm.h" #include "tests/checkasm/internal.h"
#include "src/ipred.h" #include "src/ipred.h"
#include "src/levels.h" #include "src/levels.h"
+1 -1
View File
@@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "tests/checkasm/checkasm.h" #include "tests/checkasm/internal.h"
#include <math.h> #include <math.h>
-213
View File
@@ -1,213 +0,0 @@
/******************************************************************************
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2024, Loongson Technology Corporation Limited
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#define PRIVATE_PREFIX checkasm_
#include "src/loongarch/loongson_asm.S"
const register_init, align=4
.quad 0x21f86d66c8ca00ce
.quad 0x75b6ba21077c48ad
.quad 0xed56bb2dcb3c7736
.quad 0x8bda43d3fd1a7e06
.quad 0xb64a9c9e5d318408
.quad 0xdf9a54b303f1d3a3
.quad 0x4a75479abd64e097
.quad 0x249214109d5d1c88
.quad 0x1a1b2550a612b48c
.quad 0x79445c159ce79064
.quad 0x2eed899d5a28ddcd
.quad 0x86b2536fcd8cf636
.quad 0xb0856806085e7943
.quad 0x3f2bf84fc0fcca4e
.quad 0xacbd382dcf5b8de2
.quad 0xd229e1f5b281303f
.quad 0x71aeaff20b095fd9
.quad 0xab63e2e11fa38ed9
endconst
const error_message
.asciz "failed to preserve register"
endconst
// max number of args used by any asm function.
#define MAX_ARGS 15
#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
// Fill dirty data at stack space
function stack_clobber
move t0, sp
addi.d t1, zero, CLOBBER_STACK
1:
st.d a0, sp, 0x00
st.d a1, sp, -0x08
addi.d sp, sp, -0x10
addi.d t1, t1, -0x10
blt zero, t1, 1b
move sp, t0
endfunc
#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15)
function checked_call
// Saved s0 - s8, fs0 - fs7
move t4, sp
addi.d sp, sp, -136
st.d s0, sp, 0
st.d s1, sp, 8
st.d s2, sp, 16
st.d s3, sp, 24
st.d s4, sp, 32
st.d s5, sp, 40
st.d s6, sp, 48
st.d s7, sp, 56
st.d s8, sp, 64
fst.d fs0, sp, 72
fst.d fs1, sp, 80
fst.d fs2, sp, 88
fst.d fs3, sp, 96
fst.d fs4, sp, 104
fst.d fs5, sp, 112
fst.d fs6, sp, 120
fst.d fs7, sp, 128
la.local t1, register_init
ld.d s0, t1, 0
ld.d s1, t1, 8
ld.d s2, t1, 16
ld.d s3, t1, 24
ld.d s4, t1, 32
ld.d s5, t1, 40
ld.d s6, t1, 48
ld.d s7, t1, 56
ld.d s8, t1, 64
fld.d fs0, t1, 72
fld.d fs1, t1, 80
fld.d fs2, t1, 88
fld.d fs3, t1, 96
fld.d fs4, t1, 104
fld.d fs5, t1, 112
fld.d fs6, t1, 120
fld.d fs7, t1, 128
addi.d sp, sp, -16
st.d a1, sp, 0 // ok
st.d ra, sp, 8 // Ret address
addi.d sp, sp, -ARG_STACK
addi.d t0, zero, 8*8
xor t1, t1, t1
.rept MAX_ARGS - 8
// Skip the first 8 args, that are loaded into registers
ldx.d t2, t4, t0
stx.d t2, sp, t1
addi.d t0, t0, 8
addi.d t1, t1, 8
.endr
move t3, a0 // Func
ld.d a0, t4, 0
ld.d a1, t4, 8
ld.d a2, t4, 16
ld.d a3, t4, 24
ld.d a4, t4, 32
ld.d a5, t4, 40
ld.d a6, t4, 48
ld.d a7, t4, 56
jirl ra, t3, 0
addi.d sp, sp, ARG_STACK
ld.d t2, sp, 0 // ok
ld.d ra, sp, 8 // Ret address
addi.d sp, sp, 16
la.local t1, register_init
xor t3, t3, t3
.macro check_reg_gr reg1
ld.d t0, t1, 0
xor t0, $s\reg1, t0
or t3, t3, t0
addi.d t1, t1, 8
.endm
check_reg_gr 0
check_reg_gr 1
check_reg_gr 2
check_reg_gr 3
check_reg_gr 4
check_reg_gr 5
check_reg_gr 6
check_reg_gr 7
check_reg_gr 8
.macro check_reg_fr reg1
ld.d t0, t1, 0
movfr2gr.d t4, $fs\reg1
xor t0, t0, t4
or t3, t3, t0
addi.d t1, t1, 8
.endm
check_reg_fr 0
check_reg_fr 1
check_reg_fr 2
check_reg_fr 3
check_reg_fr 4
check_reg_fr 5
check_reg_fr 6
check_reg_fr 7
beqz t3, 0f
st.d zero, t2, 0x00 // Set OK to 0
la.local a0, error_message
addi.d sp, sp, -8
st.d ra, sp, 0
bl puts
ld.d ra, sp, 0
addi.d sp, sp, 8
0:
ld.d s0, sp, 0
ld.d s1, sp, 8
ld.d s2, sp, 16
ld.d s3, sp, 24
ld.d s4, sp, 32
ld.d s5, sp, 40
ld.d s6, sp, 48
ld.d s7, sp, 56
ld.d s8, sp, 64
fld.d fs0, sp, 72
fld.d fs1, sp, 80
fld.d fs2, sp, 88
fld.d fs3, sp, 96
fld.d fs4, sp, 104
fld.d fs5, sp, 112
fld.d fs6, sp, 120
fld.d fs7, sp, 128
addi.d sp, sp, 136
endfunc
+1 -1
View File
@@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "tests/checkasm/checkasm.h" #include "tests/checkasm/internal.h"
#include <string.h> #include <string.h>
+1 -1
View File
@@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "tests/checkasm/checkasm.h" #include "tests/checkasm/internal.h"
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
+1 -1
View File
@@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "tests/checkasm/checkasm.h" #include "tests/checkasm/internal.h"
#include "src/levels.h" #include "src/levels.h"
#include "src/mc.h" #include "src/mc.h"
+1 -1
View File
@@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "tests/checkasm/checkasm.h" #include "tests/checkasm/internal.h"
#include "src/cpu.h" #include "src/cpu.h"
#include "src/msac.h" #include "src/msac.h"
+2 -1
View File
@@ -24,10 +24,11 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "tests/checkasm/checkasm.h" #include "tests/checkasm/internal.h"
#include "src/pal.h" #include "src/pal.h"
#include <stdio.h> #include <stdio.h>
#include <string.h>
static void check_pal_idx_finish(const Dav1dPalDSPContext *const c) { static void check_pal_idx_finish(const Dav1dPalDSPContext *const c) {
ALIGN_STK_64(uint8_t, src, 64 * 64,); ALIGN_STK_64(uint8_t, src, 64 * 64,);
+2 -1
View File
@@ -24,10 +24,11 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "tests/checkasm/checkasm.h" #include "tests/checkasm/internal.h"
#include "src/refmvs.h" #include "src/refmvs.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
static inline int gen_mv(const int total_bits, int spel_bits) { static inline int gen_mv(const int total_bits, int spel_bits) {
int bits = rnd() & ((1 << spel_bits) - 1); int bits = rnd() & ((1 << spel_bits) - 1);
-252
View File
@@ -1,252 +0,0 @@
/******************************************************************************
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2023, Nathan Egge
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#define PRIVATE_PREFIX checkasm_
#include "src/riscv/asm.S"
// max number of args used by any asm function.
#define MAX_ARGS 15
// + 16 for stack canary reference
#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15 + 16)
const register_init, align=4
.quad 0x68909d060f4a7fdd
.quad 0x924f739e310218a1
.quad 0xb988385a8254174c
.quad 0x4c1110430bf09fd7
.quad 0x2b310edf6a5d7ecf
.quad 0xda8112e98ddbb559
.quad 0x6da5854aa2f84b62
.quad 0x72b761199e9b1f38
.quad 0x13f27aa74ae5dcdf
.quad 0x36a6c12a7380e827
.quad 0x5c452889aefc8548
.quad 0x6a9ea1ddb236235f
.quad 0x0449854bdfc94b1e
.quad 0x4f849b7076a156f5
.quad 0x1baa4275e734930e
.quad 0x77df3503ba3e073d
.quad 0x6060e073705a4bf2
.quad 0xa7b482508471e44b
.quad 0xd296a3158d6da2b9
.quad 0x1c0ed711a93d970b
.quad 0x9359537fdd79569d
.quad 0x2b1dc95c1e232d62
.quad 0xab06cd578e2bb5a0
.quad 0x4100b4987a0af30f
.quad 0x2523e36f9bb1e36f
.quad 0xfb0b815930c6d25c
.quad 0x89acc810c2902fcf
.quad 0xa65854b4c2b381f1
.quad 0x78150d69a1accedf
.quad 0x057e24868e022de1
.quad 0x88f6e79ed4b8d362
.quad 0x1f4a420e262c9035
endconst
const error_message_register
error_message_rsvd:
.asciz "unallocatable register clobbered"
error_message_sreg:
.asciz "callee-saved integer register s%i modified"
error_message_fsreg:
.asciz "callee-saved floating-point register fs%i modified"
error_message_stack:
.asciz "stack clobbered"
endconst
thread_local saved_regs, quads=29 # 5 + 12 + 12
function checked_call, export=1, ext=v
/* Save the function ptr, RA, SP, unallocatable and callee-saved registers */
la.tls.ie t0, saved_regs
add t0, tp, t0
sd a0, (t0)
sd ra, 8(t0)
sd sp, 16(t0)
sd gp, 24(t0)
sd tp, 32(t0)
.irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
sd s\n, 40 + 16*\n(t0)
#ifdef __riscv_float_abi_double
fsd fs\n, 48 + 16*\n(t0)
#endif
.endr
/* Check for vector extension */
call dav1d_get_cpu_flags_riscv
and a0, a0, 1 # DAV1D_RISCV_CPU_FLAG_RVV
beqz a0, 0f
/* Clobber vector configuration */
vsetvli t0, zero, e32, m8, ta, ma
lla t0, register_init
ld t0, (t0)
.irp n, 0, 8, 16, 24
vmv.v.x v0, t0
.endr
li t0, -1 << 31
vsetvl zero, zero, t0
csrwi vxrm, 3
csrwi vxsat, 1
0:
/* Load the register arguments */
.irp n, 0, 1, 2, 3, 4, 5, 6, 7
ld a\n, 8*\n(sp)
.endr
/* Load the stack arguments */
.irp n, 8, 9, 10, 11, 12, 13, 14, 15
ld t0, 8*\n(sp)
sd t0, 8*(\n - 8) - ARG_STACK(sp)
.endr
/* Setup the stack canary */
ld t0, MAX_ARGS*8(sp)
addi sp, sp, -ARG_STACK
slli t0, t0, 3
add t0, t0, sp
ld t0, (t0)
not t0, t0
sd t0, ARG_STACK - 8(sp)
/* Clobber the stack space right below SP */
lla t0, register_init
ld t1, (t0)
.rept 16
addi sp, sp, -16
sd t1, (sp)
sd t1, 8(sp)
.endr
addi sp, sp, 16*16
/* Clobber the callee-saved and temporary registers */
.irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
.if (\n > 0 && \n < 7)
ld t\n, 16*\n(t0)
.endif
ld s\n, 8 + 8*\n(t0)
#ifdef __riscv_float_abi_double
fld ft\n, 16 + 16*\n(t0)
fld fs\n, 24 + 8*\n(t0)
#endif
.endr
/* Call the checked function */
la.tls.ie t0, saved_regs
add t0, tp, t0
ld t0, (t0)
jalr t0
/* Check the value of callee-saved registers */
lla t0, register_init
.irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
ld t1, 8 + 8*\n(t0)
li a1, \n
bne t1, s\n, 2f
#ifdef __riscv_float_abi_double
ld t1, 24 + 8*\n(t0)
fmv.x.d t2, fs\n
bne t1, t2, 3f
#endif
.endr
/* Check unallocatable register values */
la.tls.ie t0, saved_regs
add t0, tp, t0
ld t1, 16(t0)
addi t1, t1, -ARG_STACK
bne t1, sp, 4f
ld t1, 24(t0)
bne t1, gp, 4f
ld t1, 32(t0)
bne t1, tp, 4f
/* Check the stack canary */
ld t0, ARG_STACK + MAX_ARGS*8(sp)
slli t0, t0, 3
add t0, t0, sp
ld t0, (t0)
not t0, t0
ld t1, ARG_STACK - 8(sp)
bne t0, t1, 5f
1:
/* Restore RA, SP and callee-saved registers from thread local storage */
la.tls.ie t0, saved_regs
add t0, tp, t0
ld ra, 8(t0)
ld sp, 16(t0)
.irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
ld s\n, 40 + 16*\n(t0)
#ifdef __riscv_float_abi_double
fld fs\n, 48 + 16*\n(t0)
#endif
.endr
ret
2:
lla a0, error_message_sreg
#ifdef PREFIX
call _checkasm_fail_func
#else
call checkasm_fail_func
#endif
j 1b
#ifdef __riscv_float_abi_double
3:
lla a0, error_message_fsreg
#ifdef PREFIX
call _checkasm_fail_func
#else
call checkasm_fail_func
#endif
j 1b
#endif
4:
lla a0, error_message_rsvd
#ifdef PREFIX
call _checkasm_fail_func
#else
call checkasm_fail_func
#endif
j 1b
5:
lla a0, error_message_stack
#ifdef PREFIX
call _checkasm_fail_func
#else
call checkasm_fail_func
#endif
j 1b
endfunc
-475
View File
@@ -1,475 +0,0 @@
; Copyright © 2018, VideoLAN and dav1d authors
; Copyright © 2018, Two Orioles, LLC
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions are met:
;
; 1. Redistributions of source code must retain the above copyright notice, this
; list of conditions and the following disclaimer.
;
; 2. Redistributions in binary form must reproduce the above copyright notice,
; this list of conditions and the following disclaimer in the documentation
; and/or other materials provided with the distribution.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%include "config.asm"
%undef private_prefix
%define private_prefix checkasm
%include "ext/x86/x86inc.asm"
SECTION_RODATA 16
%if ARCH_X86_64
; just random numbers to reduce the chance of incidental match
%if WIN64
x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064
x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
n7: dq 0x21f86d66c8ca00ce
n8: dq 0x75b6ba21077c48ad
%endif
n9: dq 0xed56bb2dcb3c7736
n10: dq 0x8bda43d3fd1a7e06
n11: dq 0xb64a9c9e5d318408
n12: dq 0xdf9a54b303f1d3a3
n13: dq 0x4a75479abd64e097
n14: dq 0x249214109d5d1c88
%endif
errmsg_stack: db "stack corruption", 0
errmsg_register: db "failed to preserve register:%s", 0
errmsg_vzeroupper: db "missing vzeroupper", 0
SECTION .bss
check_vzeroupper: resd 1
SECTION .text
cextern fail_func
; max number of args used by any asm function.
; (max_args % 4) must equal 3 for stack alignment
%define max_args 15
%if UNIX64
DECLARE_REG_TMP 0
%else
DECLARE_REG_TMP 4
%endif
;-----------------------------------------------------------------------------
; unsigned checkasm_init_x86(char *name)
;-----------------------------------------------------------------------------
cglobal init_x86, 0, 5
%if ARCH_X86_64
push rbx
%endif
movifnidn t0, r0mp
mov eax, 0x80000000
cpuid
cmp eax, 0x80000004
jb .no_brand ; processor brand string not supported
mov eax, 0x80000002
cpuid
mov [t0+4* 0], eax
mov [t0+4* 1], ebx
mov [t0+4* 2], ecx
mov [t0+4* 3], edx
mov eax, 0x80000003
cpuid
mov [t0+4* 4], eax
mov [t0+4* 5], ebx
mov [t0+4* 6], ecx
mov [t0+4* 7], edx
mov eax, 0x80000004
cpuid
mov [t0+4* 8], eax
mov [t0+4* 9], ebx
mov [t0+4*10], ecx
mov [t0+4*11], edx
xor eax, eax
cpuid
jmp .check_xcr1
.no_brand: ; use manufacturer id as a fallback
xor eax, eax
mov [t0+4*3], eax
cpuid
mov [t0+4*0], ebx
mov [t0+4*1], edx
mov [t0+4*2], ecx
.check_xcr1:
test eax, eax
jz .end2 ; cpuid leaf 1 not supported
mov t0d, eax ; max leaf
mov eax, 1
cpuid
and ecx, 0x18000000
cmp ecx, 0x18000000
jne .end2 ; osxsave/avx not supported
cmp t0d, 13 ; cpuid leaf 13 not supported
jb .end2
mov t0d, eax ; cpuid signature
mov eax, 13
mov ecx, 1
cpuid
test al, 0x04
jz .end ; xcr1 not supported
mov ecx, 1
xgetbv
test al, 0x04
jnz .end ; always-dirty ymm state
%if ARCH_X86_64 == 0 && PIC
LEA eax, check_vzeroupper
mov [eax], ecx
%else
mov [check_vzeroupper], ecx
%endif
.end:
mov eax, t0d
.end2:
%if ARCH_X86_64
pop rbx
%endif
RET
%if ARCH_X86_64
%if WIN64
%define stack_param rsp+32 ; shadow space
%define num_fn_args rsp+stack_offset+17*8
%assign num_reg_args 4
%assign free_regs 7
%assign clobber_mask_stack_bit 16
DECLARE_REG_TMP 4
%else
%define stack_param rsp
%define num_fn_args rsp+stack_offset+11*8
%assign num_reg_args 6
%assign free_regs 9
%assign clobber_mask_stack_bit 64
DECLARE_REG_TMP 7
%endif
%macro CLOBBER_UPPER 2 ; reg, mask_bit
mov r13d, %1d
or r13, r8
test r9b, %2
cmovnz %1, r13
%endmacro
cglobal checked_call, 2, 15, 16, max_args*8+64+8
mov r10d, [num_fn_args]
mov r8, 0xdeadbeef00000000
mov r9d, [num_fn_args+r10*8+8] ; clobber_mask
mov t0, [num_fn_args+r10*8] ; func
; Clobber the upper halves of 32-bit parameters
CLOBBER_UPPER r0, 1
CLOBBER_UPPER r1, 2
CLOBBER_UPPER r2, 4
CLOBBER_UPPER r3, 8
%if UNIX64
CLOBBER_UPPER r4, 16
CLOBBER_UPPER r5, 32
%else ; WIN64
%assign i 6
%rep 16-6
mova m %+ i, [x %+ i]
%assign i i+1
%endrep
%endif
xor r11d, r11d
sub r10d, num_reg_args
cmovs r10d, r11d ; num stack args
; write stack canaries to the area above parameters passed on the stack
mov r12, [rsp+stack_offset] ; return address
not r12
%assign i 0
%rep 8 ; 64 bytes
mov [stack_param+(r10+i)*8], r12
%assign i i+1
%endrep
test r10d, r10d
jz .stack_setup_done ; no stack parameters
.copy_stack_parameter:
mov r12, [stack_param+stack_offset+8+r11*8]
CLOBBER_UPPER r12, clobber_mask_stack_bit
shr r9d, 1
mov [stack_param+r11*8], r12
inc r11d
cmp r11d, r10d
jl .copy_stack_parameter
.stack_setup_done:
%assign i 14
%rep 15-free_regs
mov r %+ i, [n %+ i]
%assign i i-1
%endrep
call t0
; check for stack corruption
mov r0d, [num_fn_args]
xor r3d, r3d
sub r0d, num_reg_args
cmovs r0d, r3d ; num stack args
mov r3, [rsp+stack_offset]
mov r4, [stack_param+r0*8]
not r3
xor r4, r3
%assign i 1
%rep 6
mov r5, [stack_param+(r0+i)*8]
xor r5, r3
or r4, r5
%assign i i+1
%endrep
xor r3, [stack_param+(r0+7)*8]
or r4, r3
jz .stack_ok
; Save the return value located in rdx:rax first to prevent clobbering.
mov r10, rax
mov r11, rdx
lea r0, [errmsg_stack]
jmp .fail
.stack_ok:
; check for failure to preserve registers
%assign i 14
%rep 15-free_regs
cmp r %+ i, [n %+ i]
setne r4b
lea r3d, [r4+r3*2]
%assign i i-1
%endrep
%if WIN64
lea r0, [rsp+32] ; account for shadow space
mov r5, r0
test r3d, r3d
jz .gpr_ok
%else
test r3d, r3d
jz .gpr_xmm_ok
mov r0, rsp
%endif
%assign i free_regs
%rep 15-free_regs
%if i < 10
mov dword [r0], " r0" + (i << 16)
lea r4, [r0+3]
%else
mov dword [r0], " r10" + ((i - 10) << 24)
lea r4, [r0+4]
%endif
test r3b, 1 << (i - free_regs)
cmovnz r0, r4
%assign i i+1
%endrep
%if WIN64 ; xmm registers
.gpr_ok:
%assign i 6
%rep 16-6
pxor m %+ i, [x %+ i]
%assign i i+1
%endrep
packsswb m6, m7
packsswb m8, m9
packsswb m10, m11
packsswb m12, m13
packsswb m14, m15
packsswb m6, m6
packsswb m8, m10
packsswb m12, m14
packsswb m6, m6
packsswb m8, m12
packsswb m6, m8
pxor m7, m7
pcmpeqb m6, m7
pmovmskb r3d, m6
cmp r3d, 0xffff
je .xmm_ok
mov r7d, " xmm"
%assign i 6
%rep 16-6
mov [r0+0], r7d
%if i < 10
mov byte [r0+4], "0" + i
lea r4, [r0+5]
%else
mov word [r0+4], "10" + ((i - 10) << 8)
lea r4, [r0+6]
%endif
test r3d, 1 << i
cmovz r0, r4
%assign i i+1
%endrep
.xmm_ok:
cmp r0, r5
je .gpr_xmm_ok
mov byte [r0], 0
mov r11, rdx
mov r1, r5
%else
mov byte [r0], 0
mov r11, rdx
mov r1, rsp
%endif
mov r10, rax
lea r0, [errmsg_register]
jmp .fail
.gpr_xmm_ok:
; Check for dirty YMM state, i.e. missing vzeroupper
mov ecx, [check_vzeroupper]
test ecx, ecx
jz .ok ; not supported, skip
mov r10, rax
mov r11, rdx
xgetbv
test al, 0x04
jz .restore_retval ; clean ymm state
lea r0, [errmsg_vzeroupper]
vzeroupper
.fail:
; Call fail_func() with a descriptive message to mark it as a failure.
xor eax, eax
call fail_func
.restore_retval:
mov rax, r10
mov rdx, r11
.ok:
RET
; trigger a warmup of vector units
%macro WARMUP 0
cglobal warmup, 0, 0
xorps m0, m0
mulps m0, m0
RET
%endmacro
INIT_YMM avx2
WARMUP
INIT_ZMM avx512
WARMUP
%else
; just random numbers to reduce the chance of incidental match
%assign n3 0x6549315c
%assign n4 0xe02f3e23
%assign n5 0xb78d0d1d
%assign n6 0x33627ba7
;-----------------------------------------------------------------------------
; void checkasm_checked_call(void *func, ...)
;-----------------------------------------------------------------------------
cglobal checked_call, 1, 7
mov r3, [esp+stack_offset] ; return address
mov r1, [esp+stack_offset+17*4] ; num_stack_params
mov r2, 27
not r3
sub r2, r1
.push_canary:
push r3
dec r2
jg .push_canary
.push_parameter:
push dword [esp+32*4]
dec r1
jg .push_parameter
mov r3, n3
mov r4, n4
mov r5, n5
mov r6, n6
call r0
; check for failure to preserve registers
cmp r3, n3
setne r3h
cmp r4, n4
setne r3b
shl r3d, 16
cmp r5, n5
setne r3h
cmp r6, n6
setne r3b
test r3, r3
jz .gpr_ok
lea r1, [esp+16]
mov [esp+4], r1
%assign i 3
%rep 4
mov dword [r1], " r0" + (i << 16)
lea r4, [r1+3]
test r3, 1 << ((6 - i) * 8)
cmovnz r1, r4
%assign i i+1
%endrep
mov byte [r1], 0
mov r5, eax
mov r6, edx
LEA r1, errmsg_register
jmp .fail
.gpr_ok:
; check for stack corruption
mov r3, [esp+48*4] ; num_stack_params
mov r6, [esp+31*4] ; return address
mov r4, [esp+r3*4]
sub r3, 26
not r6
xor r4, r6
.check_canary:
mov r5, [esp+(r3+27)*4]
xor r5, r6
or r4, r5
inc r3
jl .check_canary
mov r5, eax
mov r6, edx
test r4, r4
jz .stack_ok
LEA r1, errmsg_stack
jmp .fail
.stack_ok:
; check for dirty YMM state, i.e. missing vzeroupper
LEA ecx, check_vzeroupper
mov ecx, [ecx]
test ecx, ecx
jz .ok ; not supported, skip
xgetbv
test al, 0x04
jz .ok ; clean ymm state
LEA r1, errmsg_vzeroupper
vzeroupper
.fail:
mov [esp], r1
call fail_func
.ok:
add esp, 27*4
mov eax, r5
mov edx, r6
RET
%endif ; ARCH_X86_64
+10 -27
View File
@@ -31,7 +31,13 @@ if not get_option('enable_tests')
subdir_done() subdir_done()
endif endif
if is_asm_enabled checkasm_dependency = dependency('checkasm',
fallback: ['checkasm', 'checkasm_dep'],
required: false
)
if is_asm_enabled and checkasm_dependency.found()
checkasm_sources = files( checkasm_sources = files(
'checkasm/checkasm.c', 'checkasm/checkasm.c',
'checkasm/msac.c', 'checkasm/msac.c',
@@ -55,7 +61,7 @@ if is_asm_enabled
'checkasm_bitdepth_@0@'.format(bitdepth), 'checkasm_bitdepth_@0@'.format(bitdepth),
checkasm_tmpl_sources, checkasm_tmpl_sources,
include_directories: dav1d_inc_dirs, include_directories: dav1d_inc_dirs,
dependencies : [stdatomic_dependencies], dependencies : [stdatomic_dependencies, checkasm_dependency],
c_args: ['-DBITDEPTH=@0@'.format(bitdepth)], c_args: ['-DBITDEPTH=@0@'.format(bitdepth)],
install: false, install: false,
build_by_default: false, build_by_default: false,
@@ -63,29 +69,8 @@ if is_asm_enabled
checkasm_bitdepth_objs += checkasm_bitdepth_lib.extract_all_objects(recursive: true) checkasm_bitdepth_objs += checkasm_bitdepth_lib.extract_all_objects(recursive: true)
endforeach endforeach
checkasm_asm_objs = []
checkasm_asm_sources = []
if host_machine.cpu_family() == 'aarch64' or host_machine.cpu() == 'arm64'
checkasm_asm_sources += files('checkasm/arm/checkasm_64.S')
elif host_machine.cpu_family().startswith('arm')
checkasm_asm_sources += files('checkasm/arm/checkasm_32.S')
elif host_machine.cpu_family() == 'riscv64'
checkasm_asm_sources += files('checkasm/riscv/checkasm_64.S')
elif host_machine.cpu_family().startswith('x86')
checkasm_asm_objs += nasm_gen.process(files('checkasm/x86/checkasm.asm'))
elif host_machine.cpu_family().startswith('loongarch')
checkasm_asm_sources += files('checkasm/loongarch/checkasm.S')
endif
if use_gaspp
checkasm_asm_objs += gaspp_gen.process(checkasm_asm_sources)
else
checkasm_sources += checkasm_asm_sources
endif
checkasm = executable('checkasm', checkasm = executable('checkasm',
checkasm_sources, checkasm_sources,
checkasm_asm_objs,
objects: [ objects: [
checkasm_bitdepth_objs, checkasm_bitdepth_objs,
@@ -94,11 +79,9 @@ if is_asm_enabled
include_directories: dav1d_inc_dirs, include_directories: dav1d_inc_dirs,
build_by_default: false, build_by_default: false,
dependencies : [ dependencies: [
thread_dependency,
rt_dependency,
libdl_dependency,
libm_dependency, libm_dependency,
checkasm_dependency,
], ],
) )