swscale/aarch64/yuv2rgb_neon: add BE 16bpp output formats

BE counterparts to the LE paths in 2e142e52ae; pack adds rev16 before
store. nv12/nv21 paths are added but bench-only (no C ref, same as
2e142e52ae).

Test Name                              A55-gcc           M1-clang             A76-gcc
-------------------------------------------------------------------------------------
yuv420p_rgb565be_1920_neon    15086.1 ( 3.91x)    5507.0 ( 4.34x)    19229.1 ( 2.02x)
yuv420p_bgr565be_1920_neon    15291.7 ( 3.84x)    5476.9 ( 4.37x)    19229.4 ( 2.02x)
yuv420p_rgb555be_1920_neon    15091.5 ( 3.67x)    5569.0 ( 3.97x)    19229.3 ( 1.90x)
yuv420p_bgr555be_1920_neon    15298.6 ( 3.62x)    5600.6 ( 3.98x)    19228.8 ( 1.90x)
yuv422p_rgb565be_1920_neon    16862.3 ( 4.00x)    6378.8 ( 4.64x)    22110.3 ( 2.07x)
yuv422p_bgr565be_1920_neon    17139.3 ( 3.93x)    6448.1 ( 4.50x)    22104.1 ( 2.07x)
yuv422p_rgb555be_1920_neon    16853.3 ( 3.98x)    6468.8 ( 4.12x)    22106.4 ( 1.98x)
yuv422p_bgr555be_1920_neon    17202.2 ( 3.89x)    6467.0 ( 4.12x)    22110.2 ( 1.98x)
yuva420p_rgb565be_1920_neon   15050.2 ( 3.92x)    5452.5 ( 4.39x)    19229.5 ( 2.02x)
yuva420p_bgr565be_1920_neon   15346.6 ( 3.84x)    5462.4 ( 4.36x)    19228.9 ( 2.02x)
yuva420p_rgb555be_1920_neon   15050.8 ( 3.69x)    5463.3 ( 3.95x)    19228.6 ( 1.90x)
yuva420p_bgr555be_1920_neon   15352.8 ( 3.61x)    5543.6 ( 3.89x)    19228.6 ( 1.90x)

Co-authored-by: Ramiro Polla <ramiro.polla@gmail.com>
Signed-off-by: DROOdotFOO <drew@axol.io>
This commit is contained in:
DROOdotFOO
2026-06-10 17:54:20 +00:00
committed by Ramiro Polla
co-authored by Ramiro Polla
parent 7ab5aebc08
commit cc7c567920
3 changed files with 158 additions and 39 deletions
+24
View File
@@ -95,6 +95,10 @@ DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, rgb565le)
DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, bgr565le) \ DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, bgr565le) \
DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, rgb555le) \ DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, rgb555le) \
DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, bgr555le) \ DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, bgr555le) \
DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, rgb565be) \
DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, bgr565be) \
DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, rgb555be) \
DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, bgr555be) \
DECLARE_FF_YUVX_TO_ALL_RGB16_FUNCS(nv12) DECLARE_FF_YUVX_TO_ALL_RGB16_FUNCS(nv12)
DECLARE_FF_YUVX_TO_ALL_RGB16_FUNCS(nv21) DECLARE_FF_YUVX_TO_ALL_RGB16_FUNCS(nv21)
@@ -161,6 +165,10 @@ static int nv24_to_yuv420p_neon_wrapper(SwsInternal *c, const uint8_t *const src
SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, bgr565le, BGR565LE, accurate_rnd); \ SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, bgr565le, BGR565LE, accurate_rnd); \
SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, rgb555le, RGB555LE, accurate_rnd); \ SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, rgb555le, RGB555LE, accurate_rnd); \
SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, bgr555le, BGR555LE, accurate_rnd); \ SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, bgr555le, BGR555LE, accurate_rnd); \
SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, rgb565be, RGB565BE, accurate_rnd); \
SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, bgr565be, BGR565BE, accurate_rnd); \
SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, rgb555be, RGB555BE, accurate_rnd); \
SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, bgr555be, BGR555BE, accurate_rnd); \
} while (0) } while (0)
static void get_unscaled_swscale_neon(SwsInternal *c) { static void get_unscaled_swscale_neon(SwsInternal *c) {
@@ -186,6 +194,10 @@ static void get_unscaled_swscale_neon(SwsInternal *c) {
SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, bgr565le, BGR565LE, accurate_rnd); SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, bgr565le, BGR565LE, accurate_rnd);
SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, rgb555le, RGB555LE, accurate_rnd); SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, rgb555le, RGB555LE, accurate_rnd);
SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, bgr555le, BGR555LE, accurate_rnd); SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, bgr555le, BGR555LE, accurate_rnd);
SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, rgb565be, RGB565BE, accurate_rnd);
SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, bgr565be, BGR565BE, accurate_rnd);
SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, rgb555be, RGB555BE, accurate_rnd);
SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, bgr555be, BGR555BE, accurate_rnd);
if (c->opts.dst_format == AV_PIX_FMT_YUV420P && if (c->opts.dst_format == AV_PIX_FMT_YUV420P &&
(c->opts.src_format == AV_PIX_FMT_NV24 || c->opts.src_format == AV_PIX_FMT_NV42) && (c->opts.src_format == AV_PIX_FMT_NV24 || c->opts.src_format == AV_PIX_FMT_NV42) &&
@@ -221,6 +233,10 @@ av_cold SwsFunc ff_yuv2rgb_init_aarch64(SwsInternal *c)
case AV_PIX_FMT_BGR565LE: return yuv420p_to_bgr565le_neon_wrapper; case AV_PIX_FMT_BGR565LE: return yuv420p_to_bgr565le_neon_wrapper;
case AV_PIX_FMT_RGB555LE: return yuv420p_to_rgb555le_neon_wrapper; case AV_PIX_FMT_RGB555LE: return yuv420p_to_rgb555le_neon_wrapper;
case AV_PIX_FMT_BGR555LE: return yuv420p_to_bgr555le_neon_wrapper; case AV_PIX_FMT_BGR555LE: return yuv420p_to_bgr555le_neon_wrapper;
case AV_PIX_FMT_RGB565BE: return yuv420p_to_rgb565be_neon_wrapper;
case AV_PIX_FMT_BGR565BE: return yuv420p_to_bgr565be_neon_wrapper;
case AV_PIX_FMT_RGB555BE: return yuv420p_to_rgb555be_neon_wrapper;
case AV_PIX_FMT_BGR555BE: return yuv420p_to_bgr555be_neon_wrapper;
} }
} else if (c->opts.src_format == AV_PIX_FMT_YUVA420P) { } else if (c->opts.src_format == AV_PIX_FMT_YUVA420P) {
switch (c->opts.dst_format) { switch (c->opts.dst_format) {
@@ -238,6 +254,10 @@ av_cold SwsFunc ff_yuv2rgb_init_aarch64(SwsInternal *c)
case AV_PIX_FMT_BGR565LE: return yuv420p_to_bgr565le_neon_wrapper; case AV_PIX_FMT_BGR565LE: return yuv420p_to_bgr565le_neon_wrapper;
case AV_PIX_FMT_RGB555LE: return yuv420p_to_rgb555le_neon_wrapper; case AV_PIX_FMT_RGB555LE: return yuv420p_to_rgb555le_neon_wrapper;
case AV_PIX_FMT_BGR555LE: return yuv420p_to_bgr555le_neon_wrapper; case AV_PIX_FMT_BGR555LE: return yuv420p_to_bgr555le_neon_wrapper;
case AV_PIX_FMT_RGB565BE: return yuv420p_to_rgb565be_neon_wrapper;
case AV_PIX_FMT_BGR565BE: return yuv420p_to_bgr565be_neon_wrapper;
case AV_PIX_FMT_RGB555BE: return yuv420p_to_rgb555be_neon_wrapper;
case AV_PIX_FMT_BGR555BE: return yuv420p_to_bgr555be_neon_wrapper;
} }
} else if (c->opts.src_format == AV_PIX_FMT_YUV422P) { } else if (c->opts.src_format == AV_PIX_FMT_YUV422P) {
switch (c->opts.dst_format) { switch (c->opts.dst_format) {
@@ -252,6 +272,10 @@ av_cold SwsFunc ff_yuv2rgb_init_aarch64(SwsInternal *c)
case AV_PIX_FMT_BGR565LE: return yuv422p_to_bgr565le_neon_wrapper; case AV_PIX_FMT_BGR565LE: return yuv422p_to_bgr565le_neon_wrapper;
case AV_PIX_FMT_RGB555LE: return yuv422p_to_rgb555le_neon_wrapper; case AV_PIX_FMT_RGB555LE: return yuv422p_to_rgb555le_neon_wrapper;
case AV_PIX_FMT_BGR555LE: return yuv422p_to_bgr555le_neon_wrapper; case AV_PIX_FMT_BGR555LE: return yuv422p_to_bgr555le_neon_wrapper;
case AV_PIX_FMT_RGB565BE: return yuv422p_to_rgb565be_neon_wrapper;
case AV_PIX_FMT_BGR565BE: return yuv422p_to_bgr565be_neon_wrapper;
case AV_PIX_FMT_RGB555BE: return yuv422p_to_rgb555be_neon_wrapper;
case AV_PIX_FMT_BGR555BE: return yuv422p_to_bgr555be_neon_wrapper;
} }
} }
return NULL; return NULL;
+100 -17
View File
@@ -301,6 +301,22 @@
dst_load_args_packed 2 dst_load_args_packed 2
.endm .endm
.macro dst_load_args_rgb565be
dst_load_args_packed 2
.endm
.macro dst_load_args_bgr565be
dst_load_args_packed 2
.endm
.macro dst_load_args_rgb555be
dst_load_args_packed 2
.endm
.macro dst_load_args_bgr555be
dst_load_args_packed 2
.endm
.macro dst_load_args_gbrp .macro dst_load_args_gbrp
dst_load_args_planar dst_load_args_planar
.endm .endm
@@ -365,6 +381,22 @@
dst_load_args_packed_2l 2 dst_load_args_packed_2l 2
.endm .endm
.macro dst_load_args_rgb565be_2l
dst_load_args_packed_2l 2
.endm
.macro dst_load_args_bgr565be_2l
dst_load_args_packed_2l 2
.endm
.macro dst_load_args_rgb555be_2l
dst_load_args_packed_2l 2
.endm
.macro dst_load_args_bgr555be_2l
dst_load_args_packed_2l 2
.endm
// 2-lines-at-a-time planar dst loader. \sp_off is the byte offset at // 2-lines-at-a-time planar dst loader. \sp_off is the byte offset at
// which the caller's [sp+0] arg now lives (i.e., however many bytes the // which the caller's [sp+0] arg now lives (i.e., however many bytes the
// caller pushed before invoking this macro). declare_2l_gbrp spills // caller pushed before invoking this macro). declare_2l_gbrp spills
@@ -639,11 +671,11 @@
.endif .endif
compute_rgb v4, v5, v6, v16, v17, v18 compute_rgb v4, v5, v6, v16, v17, v18
.if r_first .if r_first
// rgb*le: (R << hshift) | (G << 5) | B // rgb*: (R << hshift) | (G << 5) | B
pack_rgb16_2l v8, v6, v5, v4, gshift, hshift pack_rgb16_2l v8, v6, v5, v4, gshift, hshift
pack_rgb16_2l v9, v18, v17, v16, gshift, hshift pack_rgb16_2l v9, v18, v17, v16, gshift, hshift
.else .else
// bgr*le: (B << hshift) | (G << 5) | R // bgr*: (B << hshift) | (G << 5) | R
pack_rgb16_2l v8, v4, v5, v6, gshift, hshift pack_rgb16_2l v8, v4, v5, v6, gshift, hshift
pack_rgb16_2l v9, v16, v17, v18, gshift, hshift pack_rgb16_2l v9, v16, v17, v18, gshift, hshift
.endif .endif
@@ -651,15 +683,16 @@
.endif .endif
.endm .endm
// Map ofmt to .set predicates: rgb16=1 for the four 16bpp LE ofmts // Map ofmt to .set predicates: rgb16=1 for the eight 16bpp ofmts
// (r_first=1 for rgb*, 0 for bgr*; gshift/hshift = 2/11 for 565, // (r_first=1 for rgb*, 0 for bgr*; gshift/hshift = 2/11 for 565,
// 3/10 for 555), letting sibling macros branch on .if rgb16 instead of // 3/10 for 555; is_be=1 for the BE variants), letting sibling macros
// repeating a four-way .ifc cascade. // branch on .if rgb16 / .if is_be instead of repeating .ifc cascades.
.macro set_rgb16_predicates ofmt .macro set_rgb16_predicates ofmt
.set rgb16, 0 .set rgb16, 0
.set r_first, 0 .set r_first, 0
.set gshift, 0 .set gshift, 0
.set hshift, 0 .set hshift, 0
.set is_be, 0
.ifc \ofmt,rgb565le .ifc \ofmt,rgb565le
.set rgb16, 1 .set rgb16, 1
.set r_first, 1 .set r_first, 1
@@ -682,6 +715,32 @@
.set gshift, 3 .set gshift, 3
.set hshift, 10 .set hshift, 10
.endif .endif
.ifc \ofmt,rgb565be
.set rgb16, 1
.set r_first, 1
.set gshift, 2
.set hshift, 11
.set is_be, 1
.endif
.ifc \ofmt,bgr565be
.set rgb16, 1
.set gshift, 2
.set hshift, 11
.set is_be, 1
.endif
.ifc \ofmt,rgb555be
.set rgb16, 1
.set r_first, 1
.set gshift, 3
.set hshift, 10
.set is_be, 1
.endif
.ifc \ofmt,bgr555be
.set rgb16, 1
.set gshift, 3
.set hshift, 10
.set is_be, 1
.endif
.endm .endm
// 16bpp packing uses v8/v9 as the accumulator. AAPCS-64 requires d8/d9 // 16bpp packing uses v8/v9 as the accumulator. AAPCS-64 requires d8/d9
@@ -704,10 +763,10 @@
// Pack 8 pixels of 16bpp output. The three channels are extracted via ushr, // Pack 8 pixels of 16bpp output. The three channels are extracted via ushr,
// widened to u16, then merged via shift-left-insert: // widened to u16, then merged via shift-left-insert:
// dst = (high << high_shl) | (mid << 5) | low // dst = (high << high_shl) | (mid << 5) | low
// For RGB565LE pass (B, G, R) as (low, mid, high), g_shr=2, high_shl=11. // For RGB565LE/BE pass (B, G, R) as (low, mid, high), g_shr=2, high_shl=11.
// For BGR565LE pass (R, G, B), g_shr=2, high_shl=11. // For BGR565LE/BE pass (R, G, B), g_shr=2, high_shl=11.
// For RGB555LE pass (B, G, R), g_shr=3, high_shl=10. // For RGB555LE/BE pass (B, G, R), g_shr=3, high_shl=10.
// For BGR555LE pass (R, G, B), g_shr=3, high_shl=10. // For BGR555LE/BE pass (R, G, B), g_shr=3, high_shl=10.
// Clobbers v20-v23. // Clobbers v20-v23.
.macro pack_rgb16 dst, low_ch, mid_ch, high_ch, g_shr, high_shl .macro pack_rgb16 dst, low_ch, mid_ch, high_ch, g_shr, high_shl
ushr v20.8b, \high_ch\().8b, #3 ushr v20.8b, \high_ch\().8b, #3
@@ -718,6 +777,9 @@
sli \dst\().8h, v23.8h, #5 sli \dst\().8h, v23.8h, #5
uxtl v23.8h, v20.8b uxtl v23.8h, v20.8b
sli \dst\().8h, v23.8h, #\high_shl sli \dst\().8h, v23.8h, #\high_shl
.if is_be
rev16 \dst\().16b, \dst\().16b
.endif
.endm .endm
// As pack_rgb16 but uses v26-v29 as scratch (luma temps, dead after // As pack_rgb16 but uses v26-v29 as scratch (luma temps, dead after
@@ -733,6 +795,9 @@
sli \dst\().8h, v29.8h, #5 sli \dst\().8h, v29.8h, #5
uxtl v29.8h, v26.8b uxtl v29.8h, v26.8b
sli \dst\().8h, v29.8h, #\high_shl sli \dst\().8h, v29.8h, #\high_shl
.if is_be
rev16 \dst\().16b, \dst\().16b
.endif
.endm .endm
.macro declare_func ifmt ofmt .macro declare_func ifmt ofmt
@@ -827,11 +892,11 @@ function ff_\ifmt\()_to_\ofmt\()_neon, export=1
.if rgb16 .if rgb16
compute_rgb v4,v5,v6, v16,v17,v18 compute_rgb v4,v5,v6, v16,v17,v18
.if r_first .if r_first
// rgb*le: (R << hshift) | (G << 5) | B // rgb*: (R << hshift) | (G << 5) | B
pack_rgb16 v8, v6, v5, v4, gshift, hshift pack_rgb16 v8, v6, v5, v4, gshift, hshift
pack_rgb16 v9, v18, v17, v16, gshift, hshift pack_rgb16 v9, v18, v17, v16, gshift, hshift
.else .else
// bgr*le: (B << hshift) | (G << 5) | R // bgr*: (B << hshift) | (G << 5) | R
pack_rgb16 v8, v4, v5, v6, gshift, hshift pack_rgb16 v8, v4, v5, v6, gshift, hshift
pack_rgb16 v9, v16, v17, v18, gshift, hshift pack_rgb16 v9, v16, v17, v18, gshift, hshift
.endif .endif
@@ -970,25 +1035,43 @@ declare_rgb_funcs_2l_packed yuv420p
declare_2l_gbrp yuv420p declare_2l_gbrp yuv420p
declare_rgb_funcs yuv422p declare_rgb_funcs yuv422p
.macro declare_rgb16_funcs ifmt .macro declare_rgb16le_funcs ifmt
declare_func \ifmt, rgb565le declare_func \ifmt, rgb565le
declare_func \ifmt, bgr565le declare_func \ifmt, bgr565le
declare_func \ifmt, rgb555le declare_func \ifmt, rgb555le
declare_func \ifmt, bgr555le declare_func \ifmt, bgr555le
.endm .endm
.macro declare_rgb16_funcs_2l ifmt .macro declare_rgb16le_funcs_2l ifmt
declare_2l_packed \ifmt, rgb565le declare_2l_packed \ifmt, rgb565le
declare_2l_packed \ifmt, bgr565le declare_2l_packed \ifmt, bgr565le
declare_2l_packed \ifmt, rgb555le declare_2l_packed \ifmt, rgb555le
declare_2l_packed \ifmt, bgr555le declare_2l_packed \ifmt, bgr555le
.endm .endm
.macro declare_rgb16be_funcs ifmt
declare_func \ifmt, rgb565be
declare_func \ifmt, bgr565be
declare_func \ifmt, rgb555be
declare_func \ifmt, bgr555be
.endm
.macro declare_rgb16be_funcs_2l ifmt
declare_2l_packed \ifmt, rgb565be
declare_2l_packed \ifmt, bgr565be
declare_2l_packed \ifmt, rgb555be
declare_2l_packed \ifmt, bgr555be
.endm
// Subsampled inputs take the 2-line rgb16 path; yuv422p stays single-row. // Subsampled inputs take the 2-line rgb16 path; yuv422p stays single-row.
declare_rgb16_funcs_2l nv12 declare_rgb16le_funcs_2l nv12
declare_rgb16_funcs_2l nv21 declare_rgb16be_funcs_2l nv12
declare_rgb16_funcs_2l yuv420p declare_rgb16le_funcs_2l nv21
declare_rgb16_funcs yuv422p declare_rgb16be_funcs_2l nv21
declare_rgb16le_funcs_2l yuv420p
declare_rgb16be_funcs_2l yuv420p
declare_rgb16le_funcs yuv422p
declare_rgb16be_funcs yuv422p
.macro declare_yuva_funcs ifmt .macro declare_yuva_funcs ifmt
declare_func \ifmt, argb declare_func \ifmt, argb
+34 -22
View File
@@ -46,10 +46,14 @@ static const int dst_fmts[] = {
AV_PIX_FMT_BGRA, AV_PIX_FMT_BGRA,
AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB24,
AV_PIX_FMT_BGR24, AV_PIX_FMT_BGR24,
AV_PIX_FMT_RGB565, AV_PIX_FMT_RGB565LE,
AV_PIX_FMT_BGR565, AV_PIX_FMT_BGR565LE,
AV_PIX_FMT_RGB555, AV_PIX_FMT_RGB555LE,
AV_PIX_FMT_BGR555, AV_PIX_FMT_BGR555LE,
AV_PIX_FMT_RGB565BE,
AV_PIX_FMT_BGR565BE,
AV_PIX_FMT_RGB555BE,
AV_PIX_FMT_BGR555BE,
// AV_PIX_FMT_RGB444, // AV_PIX_FMT_RGB444,
// AV_PIX_FMT_BGR444, // AV_PIX_FMT_BGR444,
// AV_PIX_FMT_RGB8, // AV_PIX_FMT_RGB8,
@@ -71,31 +75,31 @@ static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int a
return 0; return 0;
} }
static int cmp_555_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy) static int cmp_555_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy, int is_be)
{ {
const uint16_t *ref16 = (const uint16_t *) ref;
const uint16_t *test16 = (const uint16_t *) test;
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
if (abs(( ref16[i] & 0x1f) - ( test16[i] & 0x1f)) > accuracy) uint16_t r = is_be ? AV_RB16(ref + i * 2) : AV_RL16(ref + i * 2);
uint16_t t = is_be ? AV_RB16(test + i * 2) : AV_RL16(test + i * 2);
if (abs(( r & 0x1f) - ( t & 0x1f)) > accuracy)
return 1; return 1;
if (abs(((ref16[i] >> 5) & 0x1f) - ((test16[i] >> 5) & 0x1f)) > accuracy) if (abs(((r >> 5) & 0x1f) - ((t >> 5) & 0x1f)) > accuracy)
return 1; return 1;
if (abs(((ref16[i] >> 10) & 0x1f) - ((test16[i] >> 10) & 0x1f)) > accuracy) if (abs(((r >> 10) & 0x1f) - ((t >> 10) & 0x1f)) > accuracy)
return 1; return 1;
} }
return 0; return 0;
} }
static int cmp_565_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy) static int cmp_565_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy, int is_be)
{ {
const uint16_t *ref16 = (const uint16_t *) ref;
const uint16_t *test16 = (const uint16_t *) test;
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
if (abs(( ref16[i] & 0x1f) - ( test16[i] & 0x1f)) > accuracy) uint16_t r = is_be ? AV_RB16(ref + i * 2) : AV_RL16(ref + i * 2);
uint16_t t = is_be ? AV_RB16(test + i * 2) : AV_RL16(test + i * 2);
if (abs(( r & 0x1f) - ( t & 0x1f)) > accuracy)
return 1; return 1;
if (abs(((ref16[i] >> 5) & 0x3f) - ((test16[i] >> 5) & 0x3f)) > accuracy) if (abs(((r >> 5) & 0x3f) - ((t >> 5) & 0x3f)) > accuracy)
return 1; return 1;
if (abs(((ref16[i] >> 11) & 0x1f) - ((test16[i] >> 11) & 0x1f)) > accuracy) if (abs(((r >> 11) & 0x1f) - ((t >> 11) & 0x1f)) > accuracy)
return 1; return 1;
} }
return 0; return 0;
@@ -199,19 +203,27 @@ static void check_yuv2rgb(int src_pix_fmt)
dst1_0 + row * dstStride[0], dst1_0 + row * dstStride[0],
width * sample_size, 3)) width * sample_size, 3))
fail(); fail();
} else if (dst_pix_fmt == AV_PIX_FMT_RGB565 || } else if (dst_pix_fmt == AV_PIX_FMT_RGB565LE ||
dst_pix_fmt == AV_PIX_FMT_BGR565) { dst_pix_fmt == AV_PIX_FMT_BGR565LE ||
dst_pix_fmt == AV_PIX_FMT_RGB565BE ||
dst_pix_fmt == AV_PIX_FMT_BGR565BE) {
int is_be = dst_pix_fmt == AV_PIX_FMT_RGB565BE ||
dst_pix_fmt == AV_PIX_FMT_BGR565BE;
for (int row = 0; row < srcSliceH; row++) for (int row = 0; row < srcSliceH; row++)
if (cmp_565_by_n(dst0_0 + row * dstStride[0], if (cmp_565_by_n(dst0_0 + row * dstStride[0],
dst1_0 + row * dstStride[0], dst1_0 + row * dstStride[0],
width, 2)) width, 2, is_be))
fail(); fail();
} else if (dst_pix_fmt == AV_PIX_FMT_RGB555 || } else if (dst_pix_fmt == AV_PIX_FMT_RGB555LE ||
dst_pix_fmt == AV_PIX_FMT_BGR555) { dst_pix_fmt == AV_PIX_FMT_BGR555LE ||
dst_pix_fmt == AV_PIX_FMT_RGB555BE ||
dst_pix_fmt == AV_PIX_FMT_BGR555BE) {
int is_be = dst_pix_fmt == AV_PIX_FMT_RGB555BE ||
dst_pix_fmt == AV_PIX_FMT_BGR555BE;
for (int row = 0; row < srcSliceH; row++) for (int row = 0; row < srcSliceH; row++)
if (cmp_555_by_n(dst0_0 + row * dstStride[0], if (cmp_555_by_n(dst0_0 + row * dstStride[0],
dst1_0 + row * dstStride[0], dst1_0 + row * dstStride[0],
width, 2)) width, 2, is_be))
fail(); fail();
} else if (dst_pix_fmt == AV_PIX_FMT_GBRP) { } else if (dst_pix_fmt == AV_PIX_FMT_GBRP) {
for (int p = 0; p < 3; p++) for (int p = 0; p < 3; p++)