mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-11 08:13:06 +00:00
swscale/aarch64/ops: simplify process function generation
There was no good reason to have it as an SwsAArch64OpType. Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <ramiro.polla@gmail.com>
This commit is contained in:
+12
-10
@@ -222,22 +222,24 @@ static int aarch64_compile(SwsContext *ctx, const SwsOpList *ops,
|
||||
}
|
||||
|
||||
/* Look up process function. */
|
||||
void ff_sws_process_0001_neon(void);
|
||||
void ff_sws_process_0011_neon(void);
|
||||
void ff_sws_process_0111_neon(void);
|
||||
void ff_sws_process_1111_neon(void);
|
||||
|
||||
const SwsOp *read = ff_sws_op_list_input(&rest);
|
||||
const SwsOp *write = ff_sws_op_list_output(&rest);
|
||||
const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0;
|
||||
const int write_planes = write->rw.packed ? 1 : write->rw.elems;
|
||||
SwsAArch64OpMask mask = 0;
|
||||
for (int i = 0; i < FFMAX(read_planes, write_planes); i++)
|
||||
MASK_SET(mask, i, 1);
|
||||
|
||||
SwsAArch64OpImplParams process_params = { .op = AARCH64_SWS_OP_PROCESS, .mask = mask };
|
||||
SwsFuncPtr process_func = ff_sws_aarch64_lookup(&process_params);
|
||||
if (!process_func) {
|
||||
ret = AVERROR(ENOTSUP);
|
||||
goto error;
|
||||
SwsOpFunc process_func = NULL;
|
||||
switch (FFMAX(read_planes, write_planes)) {
|
||||
case 1: process_func = (SwsOpFunc) ff_sws_process_0001_neon; break;
|
||||
case 2: process_func = (SwsOpFunc) ff_sws_process_0011_neon; break;
|
||||
case 3: process_func = (SwsOpFunc) ff_sws_process_0111_neon; break;
|
||||
case 4: process_func = (SwsOpFunc) ff_sws_process_1111_neon; break;
|
||||
}
|
||||
|
||||
out->func = (SwsOpFunc) process_func;
|
||||
out->func = process_func;
|
||||
out->cpu_flags = chain->cpu_flags;
|
||||
|
||||
error:
|
||||
|
||||
@@ -272,12 +272,12 @@ static void clobber_gpr(RasmOp regs[MAX_SAVED_REGS], unsigned *count,
|
||||
}
|
||||
|
||||
static unsigned clobbered_gprs(const SwsAArch64Context *s,
|
||||
const SwsAArch64OpImplParams *p,
|
||||
SwsAArch64OpMask mask,
|
||||
RasmOp regs[MAX_SAVED_REGS])
|
||||
{
|
||||
unsigned count = 0;
|
||||
clobber_gpr(regs, &count, a64op_lr());
|
||||
LOOP_MASK(p, i) {
|
||||
LOOP(mask, i) {
|
||||
clobber_gpr(regs, &count, s->in[i]);
|
||||
clobber_gpr(regs, &count, s->out[i]);
|
||||
clobber_gpr(regs, &count, s->in_bump[i]);
|
||||
@@ -286,7 +286,7 @@ static unsigned clobbered_gprs(const SwsAArch64Context *s,
|
||||
return count;
|
||||
}
|
||||
|
||||
static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
|
||||
static void asmgen_process(SwsAArch64Context *s, SwsAArch64OpMask mask)
|
||||
{
|
||||
RasmContext *r = s->rctx;
|
||||
char func_name[128];
|
||||
@@ -297,13 +297,13 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p
|
||||
* The description in x86/ops_include.asm mostly holds as well here.
|
||||
*/
|
||||
|
||||
aarch64_op_impl_func_name(func_name, sizeof(func_name), p);
|
||||
snprintf(func_name, sizeof(func_name), "ff_sws_process_%04x_neon", mask);
|
||||
|
||||
rasm_func_begin(r, func_name, true, false);
|
||||
|
||||
/* Function prologue */
|
||||
RasmOp saved_regs[MAX_SAVED_REGS];
|
||||
unsigned nsaved = clobbered_gprs(s, p, saved_regs);
|
||||
unsigned nsaved = clobbered_gprs(s, mask, saved_regs);
|
||||
if (nsaved)
|
||||
asmgen_prologue(s, saved_regs, nsaved);
|
||||
|
||||
@@ -312,19 +312,19 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p
|
||||
i_add(r, s->op1_impl, s->impl, IMM(sizeof_impl)); CMT("SwsOpImpl *op1_impl = impl + 1;");
|
||||
|
||||
/* Load values from exec. */
|
||||
LOOP_MASK(p, i) {
|
||||
LOOP(mask, i) {
|
||||
rasm_annotate_nextf(r, buf, sizeof(buf), "in[%u] = exec->in[%u];", i, i);
|
||||
i_ldr(r, s->in[i], a64op_off(s->exec, offsetof_exec_in + (i * sizeof(uint8_t *))));
|
||||
}
|
||||
LOOP_MASK(p, i) {
|
||||
LOOP(mask, i) {
|
||||
rasm_annotate_nextf(r, buf, sizeof(buf), "out[%u] = exec->out[%u];", i, i);
|
||||
i_ldr(r, s->out[i], a64op_off(s->exec, offsetof_exec_out + (i * sizeof(uint8_t *))));
|
||||
}
|
||||
LOOP_MASK(p, i) {
|
||||
LOOP(mask, i) {
|
||||
rasm_annotate_nextf(r, buf, sizeof(buf), "in_bump[%u] = exec->in_bump[%u];", i, i);
|
||||
i_ldr(r, s->in_bump[i], a64op_off(s->exec, offsetof_exec_in_bump + (i * sizeof(ptrdiff_t))));
|
||||
}
|
||||
LOOP_MASK(p, i) {
|
||||
LOOP(mask, i) {
|
||||
rasm_annotate_nextf(r, buf, sizeof(buf), "out_bump[%u] = exec->out_bump[%u];", i, i);
|
||||
i_ldr(r, s->out_bump[i], a64op_off(s->exec, offsetof_exec_out_bump + (i * sizeof(ptrdiff_t))));
|
||||
}
|
||||
@@ -338,8 +338,8 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p
|
||||
|
||||
/* Perform padding, preparing for next row. */
|
||||
rasm_add_label(r, next_row); CMT("next_row:");
|
||||
LOOP_MASK(p, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); }
|
||||
LOOP_MASK(p, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); }
|
||||
LOOP(mask, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); }
|
||||
LOOP(mask, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); }
|
||||
|
||||
/* First row (reset x). */
|
||||
rasm_add_label(r, first_row); CMT("first_row:");
|
||||
@@ -1438,18 +1438,6 @@ static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
|
||||
}
|
||||
}
|
||||
|
||||
static void asmgen_op(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
|
||||
{
|
||||
switch (p->op) {
|
||||
case AARCH64_SWS_OP_PROCESS:
|
||||
asmgen_process(s, p);
|
||||
break;
|
||||
default:
|
||||
asmgen_op_cps(s, p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*********************************************************************/
|
||||
static void aarch64_op_impl_lookup_str(char *buf, size_t size, const SwsAArch64OpImplParams *params,
|
||||
const SwsAArch64OpImplParams *prev, const char *p_str)
|
||||
@@ -1641,10 +1629,16 @@ static int asmgen(void)
|
||||
s.in_bump [3] = a64op_gpx(26);
|
||||
s.out_bump[3] = a64op_gpx(27);
|
||||
|
||||
/* Generate all process functions using rasm. */
|
||||
asmgen_process(&s, 0x0001);
|
||||
asmgen_process(&s, 0x0011);
|
||||
asmgen_process(&s, 0x0111);
|
||||
asmgen_process(&s, 0x1111);
|
||||
|
||||
/* Generate all functions from ops_entries.c using rasm. */
|
||||
const SwsAArch64OpImplParams *params = impl_params;
|
||||
while (params->op) {
|
||||
asmgen_op(&s, params++);
|
||||
asmgen_op_cps(&s, params++);
|
||||
if (rctx->error) {
|
||||
ret = rctx->error;
|
||||
goto error;
|
||||
|
||||
@@ -3,10 +3,6 @@
|
||||
* To regenerate, run: make sws_ops_entries_aarch64
|
||||
*/
|
||||
|
||||
{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0001 },
|
||||
{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0011 },
|
||||
{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0111 },
|
||||
{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x1111 },
|
||||
{ .op = AARCH64_SWS_OP_READ_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 },
|
||||
{ .op = AARCH64_SWS_OP_READ_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 },
|
||||
{ .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 },
|
||||
|
||||
@@ -76,7 +76,6 @@ static const char *aarch64_pixel_type_name(SwsAArch64PixelType fmt)
|
||||
/*********************************************************************/
|
||||
static const char op_types[AARCH64_SWS_OP_TYPE_NB][32] = {
|
||||
[AARCH64_SWS_OP_NONE ] = "AARCH64_SWS_OP_NONE",
|
||||
[AARCH64_SWS_OP_PROCESS ] = "AARCH64_SWS_OP_PROCESS",
|
||||
[AARCH64_SWS_OP_READ_BIT ] = "AARCH64_SWS_OP_READ_BIT",
|
||||
[AARCH64_SWS_OP_READ_NIBBLE ] = "AARCH64_SWS_OP_READ_NIBBLE",
|
||||
[AARCH64_SWS_OP_READ_PACKED ] = "AARCH64_SWS_OP_READ_PACKED",
|
||||
@@ -112,7 +111,6 @@ static const char *aarch64_op_type(SwsAArch64OpType op)
|
||||
|
||||
static const char op_type_names[AARCH64_SWS_OP_TYPE_NB][16] = {
|
||||
[AARCH64_SWS_OP_NONE ] = "none",
|
||||
[AARCH64_SWS_OP_PROCESS ] = "process",
|
||||
[AARCH64_SWS_OP_READ_BIT ] = "read_bit",
|
||||
[AARCH64_SWS_OP_READ_NIBBLE ] = "read_nibble",
|
||||
[AARCH64_SWS_OP_READ_PACKED ] = "read_packed",
|
||||
@@ -323,7 +321,6 @@ static const ParamField field_dither_size_log2 = { PARAM_FIELD(dither.size_log2)
|
||||
/* Fields needed to uniquely identify each SwsAArch64OpType. */
|
||||
#define MAX_LEVELS 8
|
||||
static const ParamField *op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS] = {
|
||||
[AARCH64_SWS_OP_PROCESS ] = { &field_op, &field_mask },
|
||||
[AARCH64_SWS_OP_READ_BIT ] = { &field_op, &field_block_size, &field_type, &field_mask },
|
||||
[AARCH64_SWS_OP_READ_NIBBLE ] = { &field_op, &field_block_size, &field_type, &field_mask },
|
||||
[AARCH64_SWS_OP_READ_PACKED ] = { &field_op, &field_block_size, &field_type, &field_mask },
|
||||
|
||||
@@ -37,7 +37,6 @@ typedef enum SwsAArch64PixelType {
|
||||
/* Similar to SwsOpType */
|
||||
typedef enum SwsAArch64OpType {
|
||||
AARCH64_SWS_OP_NONE = 0,
|
||||
AARCH64_SWS_OP_PROCESS,
|
||||
AARCH64_SWS_OP_READ_BIT,
|
||||
AARCH64_SWS_OP_READ_NIBBLE,
|
||||
AARCH64_SWS_OP_READ_PACKED,
|
||||
|
||||
@@ -72,30 +72,6 @@ error:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Collect the parameters for the process function. */
|
||||
static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode **root)
|
||||
{
|
||||
const SwsOp *read = ff_sws_op_list_input(ops);
|
||||
const SwsOp *write = ff_sws_op_list_output(ops);
|
||||
const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0;
|
||||
const int write_planes = write->rw.packed ? 1 : write->rw.elems;
|
||||
int ret;
|
||||
|
||||
SwsAArch64OpMask mask = 0;
|
||||
for (int i = 0; i < FFMAX(read_planes, write_planes); i++)
|
||||
MASK_SET(mask, i, 1);
|
||||
SwsAArch64OpImplParams params = {
|
||||
.op = AARCH64_SWS_OP_PROCESS,
|
||||
.mask = mask,
|
||||
};
|
||||
|
||||
ret = aarch64_collect_op(¶ms, root);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops)
|
||||
{
|
||||
struct AVTreeNode **root = (struct AVTreeNode **) opaque;
|
||||
@@ -106,10 +82,6 @@ static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops)
|
||||
/* Use at most two full vregs during the widest precision section */
|
||||
int block_size = (ff_sws_op_list_max_size(ops) == 4) ? 8 : 16;
|
||||
|
||||
ret = aarch64_collect_process(&rest, root);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
for (int i = 0; i < rest.num_ops; i++) {
|
||||
SwsAArch64OpImplParams params = { 0 };
|
||||
ret = convert_to_aarch64_impl(ctx, &rest, i, block_size, ¶ms);
|
||||
|
||||
Reference in New Issue
Block a user