mirror of
https://code.videolan.org/videolan/dav1d
synced 2026-06-11 04:03:05 +00:00
riscv64/itx: Add 4x8 8bpc RVV transforms
inv_txfm_add_4x8_adst_adst_0_8bpc_c: 1619.6 ( 1.00x) inv_txfm_add_4x8_adst_adst_0_8bpc_rvv: 198.6 ( 8.16x) inv_txfm_add_4x8_adst_adst_1_8bpc_c: 1621.5 ( 1.00x) inv_txfm_add_4x8_adst_adst_1_8bpc_rvv: 198.5 ( 8.17x) inv_txfm_add_4x8_adst_dct_0_8bpc_c: 1496.1 ( 1.00x) inv_txfm_add_4x8_adst_dct_0_8bpc_rvv: 175.1 ( 8.54x) inv_txfm_add_4x8_adst_dct_1_8bpc_c: 1496.3 ( 1.00x) inv_txfm_add_4x8_adst_dct_1_8bpc_rvv: 175.1 ( 8.55x) inv_txfm_add_4x8_adst_flipadst_0_8bpc_c: 1624.8 ( 1.00x) inv_txfm_add_4x8_adst_flipadst_0_8bpc_rvv: 200.6 ( 8.10x) inv_txfm_add_4x8_adst_flipadst_1_8bpc_c: 1623.9 ( 1.00x) inv_txfm_add_4x8_adst_flipadst_1_8bpc_rvv: 200.6 ( 8.10x) inv_txfm_add_4x8_adst_identity_0_8bpc_c: 1132.3 ( 1.00x) inv_txfm_add_4x8_adst_identity_0_8bpc_rvv: 122.6 ( 9.24x) inv_txfm_add_4x8_adst_identity_1_8bpc_c: 1132.2 ( 1.00x) inv_txfm_add_4x8_adst_identity_1_8bpc_rvv: 122.6 ( 9.23x) inv_txfm_add_4x8_dct_adst_0_8bpc_c: 1561.5 ( 1.00x) inv_txfm_add_4x8_dct_adst_0_8bpc_rvv: 192.3 ( 8.12x) inv_txfm_add_4x8_dct_adst_1_8bpc_c: 1563.9 ( 1.00x) inv_txfm_add_4x8_dct_adst_1_8bpc_rvv: 192.3 ( 8.13x) inv_txfm_add_4x8_dct_dct_0_8bpc_c: 260.9 ( 1.00x) inv_txfm_add_4x8_dct_dct_0_8bpc_rvv: 168.9 ( 1.55x) inv_txfm_add_4x8_dct_dct_1_8bpc_c: 1443.6 ( 1.00x) inv_txfm_add_4x8_dct_dct_1_8bpc_rvv: 168.9 ( 8.55x) inv_txfm_add_4x8_dct_flipadst_0_8bpc_c: 1567.5 ( 1.00x) inv_txfm_add_4x8_dct_flipadst_0_8bpc_rvv: 194.3 ( 8.07x) inv_txfm_add_4x8_dct_flipadst_1_8bpc_c: 1565.8 ( 1.00x) inv_txfm_add_4x8_dct_flipadst_1_8bpc_rvv: 194.3 ( 8.06x) inv_txfm_add_4x8_dct_identity_0_8bpc_c: 1073.8 ( 1.00x) inv_txfm_add_4x8_dct_identity_0_8bpc_rvv: 116.4 ( 9.23x) inv_txfm_add_4x8_dct_identity_1_8bpc_c: 1074.4 ( 1.00x) inv_txfm_add_4x8_dct_identity_1_8bpc_rvv: 116.3 ( 9.23x) inv_txfm_add_4x8_flipadst_adst_0_8bpc_c: 1631.1 ( 1.00x) inv_txfm_add_4x8_flipadst_adst_0_8bpc_rvv: 200.6 ( 8.13x) inv_txfm_add_4x8_flipadst_adst_1_8bpc_c: 1631.1 ( 1.00x) inv_txfm_add_4x8_flipadst_adst_1_8bpc_rvv: 200.6 ( 8.13x) inv_txfm_add_4x8_flipadst_dct_0_8bpc_c: 1507.0 ( 1.00x) inv_txfm_add_4x8_flipadst_dct_0_8bpc_rvv: 177.1 ( 8.51x) inv_txfm_add_4x8_flipadst_dct_1_8bpc_c: 1506.3 ( 1.00x) inv_txfm_add_4x8_flipadst_dct_1_8bpc_rvv: 177.1 ( 8.50x) inv_txfm_add_4x8_flipadst_flipadst_0_8bpc_c: 1633.9 ( 1.00x) inv_txfm_add_4x8_flipadst_flipadst_0_8bpc_rvv: 202.5 ( 8.07x) inv_txfm_add_4x8_flipadst_flipadst_1_8bpc_c: 1633.7 ( 1.00x) inv_txfm_add_4x8_flipadst_flipadst_1_8bpc_rvv: 202.5 ( 8.07x) inv_txfm_add_4x8_flipadst_identity_0_8bpc_c: 1142.7 ( 1.00x) inv_txfm_add_4x8_flipadst_identity_0_8bpc_rvv: 123.2 ( 9.27x) inv_txfm_add_4x8_flipadst_identity_1_8bpc_c: 1142.6 ( 1.00x) inv_txfm_add_4x8_flipadst_identity_1_8bpc_rvv: 123.2 ( 9.27x) inv_txfm_add_4x8_identity_adst_0_8bpc_c: 1442.0 ( 1.00x) inv_txfm_add_4x8_identity_adst_0_8bpc_rvv: 168.9 ( 8.54x) inv_txfm_add_4x8_identity_adst_1_8bpc_c: 1442.8 ( 1.00x) inv_txfm_add_4x8_identity_adst_1_8bpc_rvv: 168.9 ( 8.54x) inv_txfm_add_4x8_identity_dct_0_8bpc_c: 1322.7 ( 1.00x) inv_txfm_add_4x8_identity_dct_0_8bpc_rvv: 146.7 ( 9.02x) inv_txfm_add_4x8_identity_dct_1_8bpc_c: 1320.9 ( 1.00x) inv_txfm_add_4x8_identity_dct_1_8bpc_rvv: 146.7 ( 9.00x) inv_txfm_add_4x8_identity_flipadst_0_8bpc_c: 1451.0 ( 1.00x) inv_txfm_add_4x8_identity_flipadst_0_8bpc_rvv: 171.0 ( 8.48x) inv_txfm_add_4x8_identity_flipadst_1_8bpc_c: 1450.0 ( 1.00x) inv_txfm_add_4x8_identity_flipadst_1_8bpc_rvv: 171.0 ( 8.48x) inv_txfm_add_4x8_identity_identity_0_8bpc_c: 977.1 ( 1.00x) inv_txfm_add_4x8_identity_identity_0_8bpc_rvv: 93.9 (10.41x) inv_txfm_add_4x8_identity_identity_1_8bpc_c: 976.9 ( 1.00x) inv_txfm_add_4x8_identity_identity_1_8bpc_rvv: 93.9 (10.41x)
This commit is contained in:
@@ -725,6 +725,125 @@ def_fn_8x8 flipadst, identity
|
||||
def_fn_8x8 identity, adst
|
||||
def_fn_8x8 identity, flipadst
|
||||
|
||||
function inv_txfm_add_4x8_rvv, export=1, ext=v
|
||||
csrw vxrm, zero
|
||||
|
||||
vsetivli zero, 8, e16, m1, ta, ma
|
||||
vle16.v v0, (a2)
|
||||
addi t0, a2, 16
|
||||
vle16.v v1, (t0)
|
||||
addi t0, t0, 16
|
||||
vle16.v v2, (t0)
|
||||
addi t0, t0, 16
|
||||
vle16.v v3, (t0)
|
||||
|
||||
li t1, 2896*8
|
||||
.irp i 0, 1, 2, 3
|
||||
vsmul.vx v\i, v\i, t1
|
||||
.endr
|
||||
|
||||
jalr t0, a4
|
||||
|
||||
vsseg4e16.v v0, (a2)
|
||||
|
||||
vsetivli zero, 4, e16, mf2, ta, ma
|
||||
vmv.v.x v8, zero
|
||||
vle16.v v0, (a2)
|
||||
vse16.v v8, (a2)
|
||||
.irp i, 1, 2, 3, 4, 5, 6, 7
|
||||
addi a2, a2, 8
|
||||
vle16.v v\i, (a2)
|
||||
vse16.v v8, (a2)
|
||||
.endr
|
||||
|
||||
jalr t0, a5
|
||||
|
||||
.irp i, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
vssra.vi v\i, v\i, 4
|
||||
.endr
|
||||
|
||||
vsetvli zero, zero, e8, mf4, ta, ma
|
||||
vle8.v v8, (a0)
|
||||
add t0, a0, a1
|
||||
vle8.v v9, (t0)
|
||||
.irp i, 10, 11, 12, 13, 14, 15
|
||||
add t0, t0, a1
|
||||
vle8.v v\i, (t0)
|
||||
.endr
|
||||
|
||||
vwaddu.wv v0, v0, v8
|
||||
vwaddu.wv v1, v1, v9
|
||||
vwaddu.wv v2, v2, v10
|
||||
vwaddu.wv v3, v3, v11
|
||||
vwaddu.wv v4, v4, v12
|
||||
vwaddu.wv v5, v5, v13
|
||||
vwaddu.wv v6, v6, v14
|
||||
vwaddu.wv v7, v7, v15
|
||||
|
||||
vsetvli zero, zero, e16, mf2, ta, ma
|
||||
.irp i, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
vmax.vx v\i, v\i, zero
|
||||
.endr
|
||||
|
||||
vsetvli zero, zero, e8, mf4, ta, ma
|
||||
|
||||
vnclipu.wi v8, v0, 0
|
||||
vnclipu.wi v9, v1, 0
|
||||
vnclipu.wi v10, v2, 0
|
||||
vnclipu.wi v11, v3, 0
|
||||
vnclipu.wi v12, v4, 0
|
||||
vnclipu.wi v13, v5, 0
|
||||
vnclipu.wi v14, v6, 0
|
||||
vnclipu.wi v15, v7, 0
|
||||
|
||||
vse8.v v8, (a0)
|
||||
.irp i, 9, 10, 11, 12, 13, 14, 15
|
||||
add a0, a0, a1
|
||||
vse8.v v\i, (a0)
|
||||
.endr
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
/* Define symbols added in .if statement */
|
||||
.equ dct, 1
|
||||
.equ identity, 2
|
||||
.equ adst, 3
|
||||
.equ flipadst, 4
|
||||
|
||||
.macro def_fn_48 w, h, txfm1, txfm2
|
||||
function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_rvv, export=1
|
||||
.if \txfm1 == adst || \txfm1 == flipadst
|
||||
la a4, inv_\txfm1\()_e16_x\w\()w_rvv
|
||||
.else
|
||||
la a4, inv_\txfm1\()_e16_x\w\()_rvv
|
||||
.endif
|
||||
la a5, inv_\txfm2\()_e16_x\h\()_rvv
|
||||
j inv_txfm_add_\w\()x\h\()_rvv
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
.macro def_fns_48 w, h
|
||||
def_fn_48 \w, \h, dct, dct
|
||||
def_fn_48 \w, \h, identity, identity
|
||||
def_fn_48 \w, \h, dct, adst
|
||||
def_fn_48 \w, \h, dct, flipadst
|
||||
def_fn_48 \w, \h, dct, identity
|
||||
def_fn_48 \w, \h, adst, dct
|
||||
def_fn_48 \w, \h, adst, adst
|
||||
def_fn_48 \w, \h, adst, flipadst
|
||||
def_fn_48 \w, \h, flipadst, dct
|
||||
def_fn_48 \w, \h, flipadst, adst
|
||||
def_fn_48 \w, \h, flipadst, flipadst
|
||||
def_fn_48 \w, \h, identity, dct
|
||||
def_fn_48 \w, \h, adst, identity
|
||||
def_fn_48 \w, \h, flipadst, identity
|
||||
def_fn_48 \w, \h, identity, adst
|
||||
def_fn_48 \w, \h, identity, flipadst
|
||||
.endm
|
||||
|
||||
def_fns_48 4, 8
|
||||
|
||||
function inv_identity_e16_x16_rvv, export=1, ext=v
|
||||
li t1, 2*(5793-4096)*8
|
||||
.irp i, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
@@ -58,6 +58,7 @@ decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_##w##x##h, opt))
|
||||
|
||||
#define decl_itx_fns(ext) \
|
||||
decl_itx17_fns( 4, 4, ext); \
|
||||
decl_itx16_fns( 4, 8, ext); \
|
||||
decl_itx16_fns( 8, 8, ext); \
|
||||
decl_itx16_fns(16, 16, ext)
|
||||
|
||||
@@ -105,6 +106,7 @@ static ALWAYS_INLINE void itx_dsp_init_riscv(Dav1dInvTxfmDSPContext *const c, in
|
||||
|
||||
#if BITDEPTH == 8
|
||||
assign_itx17_fn( , 4, 4, rvv);
|
||||
assign_itx16_fn(R, 4, 8, rvv);
|
||||
assign_itx16_fn( , 8, 8, rvv);
|
||||
assign_itx12_fn( , 16, 16, rvv);
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user