diff --git a/libswscale/uops.c b/libswscale/uops.c index 9fe4470d2f..3f2b81c205 100644 --- a/libswscale/uops.c +++ b/libswscale/uops.c @@ -144,6 +144,11 @@ void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX]) const SwsUOpParams *par = &op->par; switch (op->uop) { + case SWS_UOP_READ_PLANAR_FH: + case SWS_UOP_READ_PLANAR_FV: + case SWS_UOP_READ_PLANAR_FV_FMA: + av_bprintf(&bp, "_%s", ff_sws_pixel_type_name(par->filter.type)); + break; case SWS_UOP_LSHIFT: case SWS_UOP_RSHIFT: av_bprintf(&bp, "_%u", par->shift.amount); @@ -219,6 +224,11 @@ static int generate_entry_struct(void *opaque, void *key) const SwsUOpParams *par = &uop->par; switch (uop->uop) { + case SWS_UOP_READ_PLANAR_FH: + case SWS_UOP_READ_PLANAR_FV: + case SWS_UOP_READ_PLANAR_FV_FMA: + av_bprintf(bp, ", .par.filter.type = %s", pixel_types[par->filter.type].full); + break; case SWS_UOP_LSHIFT: case SWS_UOP_RSHIFT: av_bprintf(bp, ", .par.shift.amount = %u", par->shift.amount); @@ -270,6 +280,11 @@ static int generate_entry_args(void *opaque, void *key) const SwsUOpParams *par = &uop->par; switch (uop->uop) { + case SWS_UOP_READ_PLANAR_FH: + case SWS_UOP_READ_PLANAR_FV: + case SWS_UOP_READ_PLANAR_FV_FMA: + av_bprintf(bp, ", %s", pixel_types[par->filter.type].full); + break; case SWS_UOP_LSHIFT: case SWS_UOP_RSHIFT: av_bprintf(bp, ", %u", par->shift.amount); @@ -440,6 +455,7 @@ static int translate_rw_op(SwsContext *ctx, SwsUOpList *ops, SwsUOpFlags flags, if (op->rw.filter) { if (op->op == SWS_OP_WRITE || op->rw.frac || op->rw.packed) return AVERROR(ENOTSUP); + uop.par.filter.type = SWS_PIXEL_F32; uop.data.kernel = av_refstruct_ref(op->rw.kernel); if (op->rw.filter == SWS_OP_FILTER_H) { uop.uop = SWS_UOP_READ_PLANAR_FH; diff --git a/libswscale/uops.h b/libswscale/uops.h index dcfbf336b5..d69c35053d 100644 --- a/libswscale/uops.h +++ b/libswscale/uops.h @@ -135,6 +135,10 @@ typedef enum SwsUOpType { SWS_UOP_TYPE_NB, } SwsUOpType; +typedef struct SwsFilterUOp { + SwsPixelType type; /* pixel type to store result as */ +} SwsFilterUOp; + typedef struct SwsShiftUOp { uint8_t amount; } SwsShiftUOp; @@ -172,6 +176,7 @@ typedef struct SwsDitherUOp { int ff_sws_dither_height(const SwsDitherUOp *dither); typedef union SwsUOpParams { + SwsFilterUOp filter; /* for SWS_UOP_READ_*_FV/FH */ SwsShiftUOp shift; SwsSwizzleUOp swizzle; SwsPackUOp pack; diff --git a/libswscale/uops_macros.h b/libswscale/uops_macros.h index f8cd63c828..9ab1858577 100644 --- a/libswscale/uops_macros.h +++ b/libswscale/uops_macros.h @@ -30,35 +30,35 @@ MACRO(__VA_ARGS__, u8_read_planar_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR , .mask = 0x7) \ MACRO(__VA_ARGS__, u8_read_planar_xyzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR , .mask = 0xf) #define SWS_FOR_U8_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_x , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xy , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xyz , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xyzw , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fh_x_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xy_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xyz_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xyzw_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U8_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xyzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fh_x_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xy_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xyz_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xyzw_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U8_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_x , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xy , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xyz , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xyzw , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fv_x_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xy_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xyz_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xyzw_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U8_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xyzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fv_x_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xy_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xyz_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xyzw_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U8_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_x , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xy , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyz , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyzw , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_x_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xy_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyz_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyzw_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U8_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_x_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xy_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyz_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyzw_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U8_READ_PACKED(MACRO, ...) \ MACRO(__VA_ARGS__, u8_read_packed_xy , SWS_PIXEL_U8 , SWS_UOP_READ_PACKED , 0x3) \ MACRO(__VA_ARGS__, u8_read_packed_xyz , SWS_PIXEL_U8 , SWS_UOP_READ_PACKED , 0x7) \ @@ -306,35 +306,35 @@ MACRO(__VA_ARGS__, u16_read_planar_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR , .mask = 0x7) \ MACRO(__VA_ARGS__, u16_read_planar_xyzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR , .mask = 0xf) #define SWS_FOR_U16_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_x , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xy , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xyz , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xyzw , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fh_x_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xy_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xyz_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xyzw_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U16_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_x , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xyzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fh_x_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xy_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xyz_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xyzw_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U16_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_x , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xy , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xyz , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xyzw , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fv_x_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xy_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xyz_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xyzw_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U16_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_x , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xyzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fv_x_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xy_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xyz_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xyzw_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U16_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_x , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xy , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyz , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyzw , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_x_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xy_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyz_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyzw_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U16_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_x , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_x_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xy_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyz_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyzw_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U16_READ_PACKED(MACRO, ...) \ MACRO(__VA_ARGS__, u16_read_packed_xy , SWS_PIXEL_U16, SWS_UOP_READ_PACKED , 0x3) \ MACRO(__VA_ARGS__, u16_read_packed_xyz , SWS_PIXEL_U16, SWS_UOP_READ_PACKED , 0x7) \ @@ -790,35 +790,35 @@ #define SWS_FOR_F32_READ_PLANAR(MACRO, ...) #define SWS_FOR_STRUCT_F32_READ_PLANAR(MACRO, ...) #define SWS_FOR_F32_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_x , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xy , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xyz , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xyzw , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fh_x_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xy_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xyz_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xyzw_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_F32_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xy , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xyz , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xyzw , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fh_x_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xy_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xyz_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xyzw_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_F32_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_x , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xy , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xyz , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xyzw , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fv_x_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xy_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xyz_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xyzw_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_F32_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xy , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xyz , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xyzw , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fv_x_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xy_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xyz_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xyzw_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_F32_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_x , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xy , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyz , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyzw , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_x_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xy_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyz_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyzw_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_F32_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xy , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyz , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyzw , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_x_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xy_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyz_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyzw_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_F32_READ_PACKED(MACRO, ...) #define SWS_FOR_STRUCT_F32_READ_PACKED(MACRO, ...) #define SWS_FOR_F32_READ_NIBBLE(MACRO, ...) diff --git a/libswscale/uops_tmpl.c b/libswscale/uops_tmpl.c index 9e0d35ea1f..44e8551083 100644 --- a/libswscale/uops_tmpl.c +++ b/libswscale/uops_tmpl.c @@ -219,6 +219,9 @@ SWS_FOR_STRUCT(PX, WRITE_BIT, DECL_ENTRY) DECL_SETUP(setup_filter_v, params, out) { + if (params->uop->par.filter.type != SWS_PIXEL_F32) + return AVERROR(ENOTSUP); + const SwsFilterWeights *filter = params->uop->data.kernel; static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]), ">8 byte pointers not supported"); @@ -238,8 +241,9 @@ DECL_SETUP(setup_filter_v, params, out) } /* Fully general vertical planar filter case */ -DECL_READ(read_planar_fv, const SwsCompMask mask) +DECL_READ(read_planar_fv, const SwsCompMask mask, const SwsPixelType type) { + av_assert2(type == SWS_PIXEL_F32); const SwsOpExec *exec = iter->exec; const float *restrict weights = impl->priv.ptr; const int filter_size = impl->priv.i32[2]; @@ -278,6 +282,9 @@ DECL_READ(read_planar_fv, const SwsCompMask mask) DECL_SETUP(setup_filter_h, params, out) { + if (params->uop->par.filter.type != SWS_PIXEL_F32) + return AVERROR(ENOTSUP); + SwsFilterWeights *filter = params->uop->data.kernel; out->priv.ptr = av_refstruct_ref(filter->weights); out->priv.i32[2] = filter->filter_size; @@ -286,8 +293,9 @@ DECL_SETUP(setup_filter_h, params, out) } /* Fully general horizontal planar filter case */ -DECL_READ(read_planar_fh, const SwsCompMask mask) +DECL_READ(read_planar_fh, const SwsCompMask mask, const SwsPixelType type) { + av_assert2(type == SWS_PIXEL_F32); const SwsOpExec *exec = iter->exec; const int *restrict weights = impl->priv.ptr; const int filter_size = impl->priv.i32[2]; diff --git a/tests/ref/fate/sws-ops-list b/tests/ref/fate/sws-ops-list index 44b615a0c8..68a1fc1105 100644 --- a/tests/ref/fate/sws-ops-list +++ b/tests/ref/fate/sws-ops-list @@ -1 +1 @@ -900da2b2c6276da01ae3f158d02abf0b +e2f26cb6df5c11015e613016bb1a004a