loongarch: opt inv_txfm_add_identity_identity_8x32_8bpc_lsx

Relative speedup over C code:

inv_txfm_add_8x32_identity_identity_0_8bpc_c:       126.1 ( 1.00x)
inv_txfm_add_8x32_identity_identity_0_8bpc_lsx:       1.6 (78.59x)
inv_txfm_add_8x32_identity_identity_1_8bpc_c:       136.9 ( 1.00x)
inv_txfm_add_8x32_identity_identity_1_8bpc_lsx:       1.6 (85.31x)
inv_txfm_add_8x32_identity_identity_2_8bpc_c:       148.0 ( 1.00x)
inv_txfm_add_8x32_identity_identity_2_8bpc_lsx:       3.3 (45.47x)
inv_txfm_add_8x32_identity_identity_3_8bpc_c:       159.4 ( 1.00x)
inv_txfm_add_8x32_identity_identity_3_8bpc_lsx:       4.9 (32.78x)
inv_txfm_add_8x32_identity_identity_4_8bpc_c:       170.2 ( 1.00x)
inv_txfm_add_8x32_identity_identity_4_8bpc_lsx:       6.5 (26.17x)

Change-Id: Iabda6efcd8a17d26a205f90757dfea85af48848f
This commit is contained in:
yuanhecai
2024-09-30 06:37:00 +00:00
committed by Hecai Yuan
parent 5de878a4e1
commit f6ffdc90b3
2 changed files with 41 additions and 0 deletions
+39
View File
@@ -9479,6 +9479,45 @@ function inv_txfm_add_dct_dct_8x32_8bpc_lsx
.DCT_DCT_8X32_END:
endfunc
function inv_txfm_add_identity_identity_8x32_8bpc_lsx
la.local t7, eob_8x32
alsl.d t2, a1, a0, 1
.IDENTITY_IDENTITY_EOB_8x32:
ld.h t6, t7, 0
addi.d t7, t7, 2
vld_x8 a2, 0, 64, vr0, vr1, vr2, vr3, vr4, vr5, vr6, vr7
vreplgr2vr.h vr23, zero
vst_x8 a2, 0, 64, vr23, vr23, vr23, vr23, vr23, vr23, vr23, vr23
.irp i, vr0, vr1, vr2, vr3, vr4, vr5, vr6, vr7
vsrari.h \i, \i, 1
.endr
LSX_TRANSPOSE8x8_H vr0, vr1, vr2, vr3, vr4, vr5, vr6, vr7, \
vr16, vr17, vr18, vr19, vr20, vr21, vr22, vr23, \
vr8, vr9, vr10, vr11, vr12, vr13, vr14, vr15
.irp i, vr16, vr17, vr18, vr19, vr20, vr21, vr22, vr23
vsrari.h \i, \i, 2
.endr
VLD_DST_ADD_W8 vr16, vr17, vr18, vr19
alsl.d a0, a1, a0, 2
alsl.d t2, a1, a0, 1
VLD_DST_ADD_W8 vr20, vr21, vr22, vr23
alsl.d a0, a1, a0, 2
alsl.d t2, a1, a0, 1
addi.d a2, a2, 16
bge a3, t6, .IDENTITY_IDENTITY_EOB_8x32
endfunc
function inv_txfm_add_dct_dct_16x8_8bpc_lsx
bnez a3, .NO_HAS_DCONLY_16x8
+2
View File
@@ -145,6 +145,7 @@ decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_16x16, lsx));
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_16x16, lsx));
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_8x32, lsx));
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_8x32, lsx));
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_16x32, lsx));
@@ -276,6 +277,7 @@ static ALWAYS_INLINE void itx_dsp_init_loongarch(Dav1dInvTxfmDSPContext *const c
c->itxfm_add[TX_16X16][FLIPADST_DCT] = dav1d_inv_txfm_add_dct_flipadst_16x16_8bpc_lsx;
c->itxfm_add[RTX_8X32][DCT_DCT] = dav1d_inv_txfm_add_dct_dct_8x32_8bpc_lsx;
c->itxfm_add[RTX_8X32][IDTX] = dav1d_inv_txfm_add_identity_identity_8x32_8bpc_lsx;
c->itxfm_add[RTX_16X32][DCT_DCT] = dav1d_inv_txfm_add_dct_dct_16x32_8bpc_lsx;