x86: Add high bitdepth filmgrain AVX2 asm

This commit is contained in:
Ronald S. Bultje
2021-05-10 20:41:23 +02:00
committed by Henrik Gramner
co-authored by Henrik Gramner
parent 1ac1abc8eb
commit 3a6630707e
3 changed files with 2108 additions and 1 deletions
+1
View File
@@ -214,6 +214,7 @@ if is_asm_enabled
if dav1d_bitdepths.contains('16')
libdav1d_sources_asm += files(
'x86/cdef16_avx2.asm',
'x86/film_grain16_avx2.asm',
'x86/ipred16_avx2.asm',
'x86/itx16_avx2.asm',
'x86/loopfilter16_avx2.asm',
File diff suppressed because it is too large Load Diff
+15 -1
View File
@@ -46,6 +46,11 @@ decl_fguv_32x32xn_fn(dav1d_fguv_32x32xn_i420_avx2);
decl_fguv_32x32xn_fn(dav1d_fguv_32x32xn_i422_avx2);
decl_fguv_32x32xn_fn(dav1d_fguv_32x32xn_i444_avx2);
decl_generate_grain_y_fn(dav1d_generate_grain_y_16bpc_avx2);
decl_generate_grain_uv_fn(dav1d_generate_grain_uv_420_16bpc_avx2);
decl_fgy_32x32xn_fn(dav1d_fgy_32x32xn_16bpc_avx2);
decl_fguv_32x32xn_fn(dav1d_fguv_32x32xn_i420_16bpc_avx2);
COLD void bitfn(dav1d_film_grain_dsp_init_x86)(Dav1dFilmGrainDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
@@ -62,9 +67,10 @@ COLD void bitfn(dav1d_film_grain_dsp_init_x86)(Dav1dFilmGrainDSPContext *const c
c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = dav1d_fguv_32x32xn_i444_ssse3;
#endif
#if ARCH_X86_64
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
#if BITDEPTH == 8 && ARCH_X86_64
#if BITDEPTH == 8
c->generate_grain_y = dav1d_generate_grain_y_avx2;
c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = dav1d_generate_grain_uv_420_avx2;
c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = dav1d_generate_grain_uv_422_avx2;
@@ -73,5 +79,13 @@ COLD void bitfn(dav1d_film_grain_dsp_init_x86)(Dav1dFilmGrainDSPContext *const c
c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = dav1d_fguv_32x32xn_i420_avx2;
c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = dav1d_fguv_32x32xn_i422_avx2;
c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = dav1d_fguv_32x32xn_i444_avx2;
#else
c->generate_grain_y = dav1d_generate_grain_y_16bpc_avx2;
c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] =
dav1d_generate_grain_uv_420_16bpc_avx2;
c->fgy_32x32xn = dav1d_fgy_32x32xn_16bpc_avx2;
c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] =
dav1d_fguv_32x32xn_i420_16bpc_avx2;
#endif
#endif
}