mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-11 08:13:06 +00:00
avutil/x86/tx_float: add missing vzeroupper to 15xM PFA FFT
The AVX2 15xM PFA FFT calls its second-dimension subtransform with dirty
YMM. That subtransform may be a legacy-SSE codelet (fft4 is SSE2 only),
causing AVX<->SSE transition penalties. Clear them after the first
dimension, before the calls.
Detected with `sde64 -ast` FATE job.
Fixes: ace42cf581
This commit is contained in:
@@ -1874,6 +1874,8 @@ cglobal fft_pfa_15xM_float, 4, 14, 16, 320, ctx, out, in, stride, len, lut, buf,
|
|||||||
mov lutq, [ctxq + AVTXContext.map] ; load subtransform's map
|
mov lutq, [ctxq + AVTXContext.map] ; load subtransform's map
|
||||||
movsxd lenq, dword [ctxq + AVTXContext.len] ; load subtransform's length
|
movsxd lenq, dword [ctxq + AVTXContext.len] ; load subtransform's length
|
||||||
|
|
||||||
|
vzeroupper
|
||||||
|
|
||||||
.dim2:
|
.dim2:
|
||||||
call tgt5q ; call the FFT
|
call tgt5q ; call the FFT
|
||||||
lea inq, [inq + lenq*8]
|
lea inq, [inq + lenq*8]
|
||||||
|
|||||||
Reference in New Issue
Block a user