swscale/aarch64/ops: simplify process function generation

There was no good reason to have it as an SwsAArch64OpType.

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Ramiro Polla <ramiro.polla@gmail.com>
This commit is contained in:
Ramiro Polla
2026-06-10 01:47:11 +02:00
parent 19250a1846
commit 2576e09434
6 changed files with 30 additions and 70 deletions
+12 -10
View File
@@ -222,22 +222,24 @@ static int aarch64_compile(SwsContext *ctx, const SwsOpList *ops,
}
/* Look up process function. */
void ff_sws_process_0001_neon(void);
void ff_sws_process_0011_neon(void);
void ff_sws_process_0111_neon(void);
void ff_sws_process_1111_neon(void);
const SwsOp *read = ff_sws_op_list_input(&rest);
const SwsOp *write = ff_sws_op_list_output(&rest);
const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0;
const int write_planes = write->rw.packed ? 1 : write->rw.elems;
SwsAArch64OpMask mask = 0;
for (int i = 0; i < FFMAX(read_planes, write_planes); i++)
MASK_SET(mask, i, 1);
SwsAArch64OpImplParams process_params = { .op = AARCH64_SWS_OP_PROCESS, .mask = mask };
SwsFuncPtr process_func = ff_sws_aarch64_lookup(&process_params);
if (!process_func) {
ret = AVERROR(ENOTSUP);
goto error;
SwsOpFunc process_func = NULL;
switch (FFMAX(read_planes, write_planes)) {
case 1: process_func = (SwsOpFunc) ff_sws_process_0001_neon; break;
case 2: process_func = (SwsOpFunc) ff_sws_process_0011_neon; break;
case 3: process_func = (SwsOpFunc) ff_sws_process_0111_neon; break;
case 4: process_func = (SwsOpFunc) ff_sws_process_1111_neon; break;
}
out->func = (SwsOpFunc) process_func;
out->func = process_func;
out->cpu_flags = chain->cpu_flags;
error:
+18 -24
View File
@@ -272,12 +272,12 @@ static void clobber_gpr(RasmOp regs[MAX_SAVED_REGS], unsigned *count,
}
static unsigned clobbered_gprs(const SwsAArch64Context *s,
const SwsAArch64OpImplParams *p,
SwsAArch64OpMask mask,
RasmOp regs[MAX_SAVED_REGS])
{
unsigned count = 0;
clobber_gpr(regs, &count, a64op_lr());
LOOP_MASK(p, i) {
LOOP(mask, i) {
clobber_gpr(regs, &count, s->in[i]);
clobber_gpr(regs, &count, s->out[i]);
clobber_gpr(regs, &count, s->in_bump[i]);
@@ -286,7 +286,7 @@ static unsigned clobbered_gprs(const SwsAArch64Context *s,
return count;
}
static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static void asmgen_process(SwsAArch64Context *s, SwsAArch64OpMask mask)
{
RasmContext *r = s->rctx;
char func_name[128];
@@ -297,13 +297,13 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p
* The description in x86/ops_include.asm mostly holds as well here.
*/
aarch64_op_impl_func_name(func_name, sizeof(func_name), p);
snprintf(func_name, sizeof(func_name), "ff_sws_process_%04x_neon", mask);
rasm_func_begin(r, func_name, true, false);
/* Function prologue */
RasmOp saved_regs[MAX_SAVED_REGS];
unsigned nsaved = clobbered_gprs(s, p, saved_regs);
unsigned nsaved = clobbered_gprs(s, mask, saved_regs);
if (nsaved)
asmgen_prologue(s, saved_regs, nsaved);
@@ -312,19 +312,19 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p
i_add(r, s->op1_impl, s->impl, IMM(sizeof_impl)); CMT("SwsOpImpl *op1_impl = impl + 1;");
/* Load values from exec. */
LOOP_MASK(p, i) {
LOOP(mask, i) {
rasm_annotate_nextf(r, buf, sizeof(buf), "in[%u] = exec->in[%u];", i, i);
i_ldr(r, s->in[i], a64op_off(s->exec, offsetof_exec_in + (i * sizeof(uint8_t *))));
}
LOOP_MASK(p, i) {
LOOP(mask, i) {
rasm_annotate_nextf(r, buf, sizeof(buf), "out[%u] = exec->out[%u];", i, i);
i_ldr(r, s->out[i], a64op_off(s->exec, offsetof_exec_out + (i * sizeof(uint8_t *))));
}
LOOP_MASK(p, i) {
LOOP(mask, i) {
rasm_annotate_nextf(r, buf, sizeof(buf), "in_bump[%u] = exec->in_bump[%u];", i, i);
i_ldr(r, s->in_bump[i], a64op_off(s->exec, offsetof_exec_in_bump + (i * sizeof(ptrdiff_t))));
}
LOOP_MASK(p, i) {
LOOP(mask, i) {
rasm_annotate_nextf(r, buf, sizeof(buf), "out_bump[%u] = exec->out_bump[%u];", i, i);
i_ldr(r, s->out_bump[i], a64op_off(s->exec, offsetof_exec_out_bump + (i * sizeof(ptrdiff_t))));
}
@@ -338,8 +338,8 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p
/* Perform padding, preparing for next row. */
rasm_add_label(r, next_row); CMT("next_row:");
LOOP_MASK(p, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); }
LOOP_MASK(p, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); }
LOOP(mask, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); }
LOOP(mask, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); }
/* First row (reset x). */
rasm_add_label(r, first_row); CMT("first_row:");
@@ -1438,18 +1438,6 @@ static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
}
}
static void asmgen_op(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
{
switch (p->op) {
case AARCH64_SWS_OP_PROCESS:
asmgen_process(s, p);
break;
default:
asmgen_op_cps(s, p);
break;
}
}
/*********************************************************************/
static void aarch64_op_impl_lookup_str(char *buf, size_t size, const SwsAArch64OpImplParams *params,
const SwsAArch64OpImplParams *prev, const char *p_str)
@@ -1641,10 +1629,16 @@ static int asmgen(void)
s.in_bump [3] = a64op_gpx(26);
s.out_bump[3] = a64op_gpx(27);
/* Generate all process functions using rasm. */
asmgen_process(&s, 0x0001);
asmgen_process(&s, 0x0011);
asmgen_process(&s, 0x0111);
asmgen_process(&s, 0x1111);
/* Generate all functions from ops_entries.c using rasm. */
const SwsAArch64OpImplParams *params = impl_params;
while (params->op) {
asmgen_op(&s, params++);
asmgen_op_cps(&s, params++);
if (rctx->error) {
ret = rctx->error;
goto error;
-4
View File
@@ -3,10 +3,6 @@
* To regenerate, run: make sws_ops_entries_aarch64
*/
{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0001 },
{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0011 },
{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0111 },
{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x1111 },
{ .op = AARCH64_SWS_OP_READ_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 },
{ .op = AARCH64_SWS_OP_READ_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 },
{ .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 },
-3
View File
@@ -76,7 +76,6 @@ static const char *aarch64_pixel_type_name(SwsAArch64PixelType fmt)
/*********************************************************************/
static const char op_types[AARCH64_SWS_OP_TYPE_NB][32] = {
[AARCH64_SWS_OP_NONE ] = "AARCH64_SWS_OP_NONE",
[AARCH64_SWS_OP_PROCESS ] = "AARCH64_SWS_OP_PROCESS",
[AARCH64_SWS_OP_READ_BIT ] = "AARCH64_SWS_OP_READ_BIT",
[AARCH64_SWS_OP_READ_NIBBLE ] = "AARCH64_SWS_OP_READ_NIBBLE",
[AARCH64_SWS_OP_READ_PACKED ] = "AARCH64_SWS_OP_READ_PACKED",
@@ -112,7 +111,6 @@ static const char *aarch64_op_type(SwsAArch64OpType op)
static const char op_type_names[AARCH64_SWS_OP_TYPE_NB][16] = {
[AARCH64_SWS_OP_NONE ] = "none",
[AARCH64_SWS_OP_PROCESS ] = "process",
[AARCH64_SWS_OP_READ_BIT ] = "read_bit",
[AARCH64_SWS_OP_READ_NIBBLE ] = "read_nibble",
[AARCH64_SWS_OP_READ_PACKED ] = "read_packed",
@@ -323,7 +321,6 @@ static const ParamField field_dither_size_log2 = { PARAM_FIELD(dither.size_log2)
/* Fields needed to uniquely identify each SwsAArch64OpType. */
#define MAX_LEVELS 8
static const ParamField *op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS] = {
[AARCH64_SWS_OP_PROCESS ] = { &field_op, &field_mask },
[AARCH64_SWS_OP_READ_BIT ] = { &field_op, &field_block_size, &field_type, &field_mask },
[AARCH64_SWS_OP_READ_NIBBLE ] = { &field_op, &field_block_size, &field_type, &field_mask },
[AARCH64_SWS_OP_READ_PACKED ] = { &field_op, &field_block_size, &field_type, &field_mask },
-1
View File
@@ -37,7 +37,6 @@ typedef enum SwsAArch64PixelType {
/* Similar to SwsOpType */
typedef enum SwsAArch64OpType {
AARCH64_SWS_OP_NONE = 0,
AARCH64_SWS_OP_PROCESS,
AARCH64_SWS_OP_READ_BIT,
AARCH64_SWS_OP_READ_NIBBLE,
AARCH64_SWS_OP_READ_PACKED,
-28
View File
@@ -72,30 +72,6 @@ error:
return ret;
}
/* Collect the parameters for the process function. */
static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode **root)
{
const SwsOp *read = ff_sws_op_list_input(ops);
const SwsOp *write = ff_sws_op_list_output(ops);
const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0;
const int write_planes = write->rw.packed ? 1 : write->rw.elems;
int ret;
SwsAArch64OpMask mask = 0;
for (int i = 0; i < FFMAX(read_planes, write_planes); i++)
MASK_SET(mask, i, 1);
SwsAArch64OpImplParams params = {
.op = AARCH64_SWS_OP_PROCESS,
.mask = mask,
};
ret = aarch64_collect_op(&params, root);
if (ret < 0)
return ret;
return 0;
}
static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops)
{
struct AVTreeNode **root = (struct AVTreeNode **) opaque;
@@ -106,10 +82,6 @@ static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops)
/* Use at most two full vregs during the widest precision section */
int block_size = (ff_sws_op_list_max_size(ops) == 4) ? 8 : 16;
ret = aarch64_collect_process(&rest, root);
if (ret < 0)
return ret;
for (int i = 0; i < rest.num_ops; i++) {
SwsAArch64OpImplParams params = { 0 };
ret = convert_to_aarch64_impl(ctx, &rest, i, block_size, &params);