mirror of
https://code.videolan.org/videolan/dav1d
synced 2026-06-11 04:03:05 +00:00
loongarch: opt inv_txfm_add_identity_identity_8x32_8bpc_lsx
Relative speedup over C code: inv_txfm_add_8x32_identity_identity_0_8bpc_c: 126.1 ( 1.00x) inv_txfm_add_8x32_identity_identity_0_8bpc_lsx: 1.6 (78.59x) inv_txfm_add_8x32_identity_identity_1_8bpc_c: 136.9 ( 1.00x) inv_txfm_add_8x32_identity_identity_1_8bpc_lsx: 1.6 (85.31x) inv_txfm_add_8x32_identity_identity_2_8bpc_c: 148.0 ( 1.00x) inv_txfm_add_8x32_identity_identity_2_8bpc_lsx: 3.3 (45.47x) inv_txfm_add_8x32_identity_identity_3_8bpc_c: 159.4 ( 1.00x) inv_txfm_add_8x32_identity_identity_3_8bpc_lsx: 4.9 (32.78x) inv_txfm_add_8x32_identity_identity_4_8bpc_c: 170.2 ( 1.00x) inv_txfm_add_8x32_identity_identity_4_8bpc_lsx: 6.5 (26.17x) Change-Id: Iabda6efcd8a17d26a205f90757dfea85af48848f
This commit is contained in:
@@ -9479,6 +9479,45 @@ function inv_txfm_add_dct_dct_8x32_8bpc_lsx
|
||||
.DCT_DCT_8X32_END:
|
||||
endfunc
|
||||
|
||||
function inv_txfm_add_identity_identity_8x32_8bpc_lsx
|
||||
|
||||
la.local t7, eob_8x32
|
||||
alsl.d t2, a1, a0, 1
|
||||
|
||||
.IDENTITY_IDENTITY_EOB_8x32:
|
||||
ld.h t6, t7, 0
|
||||
addi.d t7, t7, 2
|
||||
vld_x8 a2, 0, 64, vr0, vr1, vr2, vr3, vr4, vr5, vr6, vr7
|
||||
|
||||
vreplgr2vr.h vr23, zero
|
||||
vst_x8 a2, 0, 64, vr23, vr23, vr23, vr23, vr23, vr23, vr23, vr23
|
||||
|
||||
.irp i, vr0, vr1, vr2, vr3, vr4, vr5, vr6, vr7
|
||||
vsrari.h \i, \i, 1
|
||||
.endr
|
||||
|
||||
LSX_TRANSPOSE8x8_H vr0, vr1, vr2, vr3, vr4, vr5, vr6, vr7, \
|
||||
vr16, vr17, vr18, vr19, vr20, vr21, vr22, vr23, \
|
||||
vr8, vr9, vr10, vr11, vr12, vr13, vr14, vr15
|
||||
|
||||
.irp i, vr16, vr17, vr18, vr19, vr20, vr21, vr22, vr23
|
||||
vsrari.h \i, \i, 2
|
||||
.endr
|
||||
|
||||
VLD_DST_ADD_W8 vr16, vr17, vr18, vr19
|
||||
|
||||
alsl.d a0, a1, a0, 2
|
||||
alsl.d t2, a1, a0, 1
|
||||
|
||||
VLD_DST_ADD_W8 vr20, vr21, vr22, vr23
|
||||
|
||||
alsl.d a0, a1, a0, 2
|
||||
alsl.d t2, a1, a0, 1
|
||||
|
||||
addi.d a2, a2, 16
|
||||
bge a3, t6, .IDENTITY_IDENTITY_EOB_8x32
|
||||
endfunc
|
||||
|
||||
function inv_txfm_add_dct_dct_16x8_8bpc_lsx
|
||||
bnez a3, .NO_HAS_DCONLY_16x8
|
||||
|
||||
|
||||
@@ -145,6 +145,7 @@ decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_16x16, lsx));
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_16x16, lsx));
|
||||
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_8x32, lsx));
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_8x32, lsx));
|
||||
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_16x32, lsx));
|
||||
|
||||
@@ -276,6 +277,7 @@ static ALWAYS_INLINE void itx_dsp_init_loongarch(Dav1dInvTxfmDSPContext *const c
|
||||
c->itxfm_add[TX_16X16][FLIPADST_DCT] = dav1d_inv_txfm_add_dct_flipadst_16x16_8bpc_lsx;
|
||||
|
||||
c->itxfm_add[RTX_8X32][DCT_DCT] = dav1d_inv_txfm_add_dct_dct_8x32_8bpc_lsx;
|
||||
c->itxfm_add[RTX_8X32][IDTX] = dav1d_inv_txfm_add_identity_identity_8x32_8bpc_lsx;
|
||||
|
||||
c->itxfm_add[RTX_16X32][DCT_DCT] = dav1d_inv_txfm_add_dct_dct_16x32_8bpc_lsx;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user