Add SSSE3 HBD filmgrain assembly optimizations

This commit is contained in:
Ronald S. Bultje
2021-06-15 09:49:02 -04:00
parent ddbbfde198
commit af16b652aa
3 changed files with 2207 additions and 0 deletions
+1
View File
@@ -224,6 +224,7 @@ if is_asm_enabled
'x86/looprestoration16_avx2.asm',
'x86/mc16_avx2.asm',
'x86/cdef16_sse.asm',
'x86/film_grain16_sse.asm',
'x86/itx16_sse.asm',
'x86/loopfilter16_sse.asm',
'x86/looprestoration16_sse.asm',
File diff suppressed because it is too large Load Diff
+14
View File
@@ -51,6 +51,11 @@ decl_generate_grain_uv_fn(dav1d_generate_grain_uv_420_16bpc_avx2);
decl_fgy_32x32xn_fn(dav1d_fgy_32x32xn_16bpc_avx2);
decl_fguv_32x32xn_fn(dav1d_fguv_32x32xn_i420_16bpc_avx2);
decl_generate_grain_y_fn(dav1d_generate_grain_y_16bpc_ssse3);
decl_generate_grain_uv_fn(dav1d_generate_grain_uv_420_16bpc_ssse3);
decl_fgy_32x32xn_fn(dav1d_fgy_32x32xn_16bpc_ssse3);
decl_fguv_32x32xn_fn(dav1d_fguv_32x32xn_i420_16bpc_ssse3);
COLD void bitfn(dav1d_film_grain_dsp_init_x86)(Dav1dFilmGrainDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
@@ -65,6 +70,15 @@ COLD void bitfn(dav1d_film_grain_dsp_init_x86)(Dav1dFilmGrainDSPContext *const c
c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = dav1d_fguv_32x32xn_i420_ssse3;
c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = dav1d_fguv_32x32xn_i422_ssse3;
c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = dav1d_fguv_32x32xn_i444_ssse3;
#else
#if ARCH_X86_64
c->generate_grain_y = dav1d_generate_grain_y_16bpc_ssse3;
c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] =
dav1d_generate_grain_uv_420_16bpc_ssse3;
c->fgy_32x32xn = dav1d_fgy_32x32xn_16bpc_ssse3;
c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] =
dav1d_fguv_32x32xn_i420_16bpc_ssse3;
#endif
#endif
#if ARCH_X86_64