mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-11 08:13:06 +00:00
avutil/x86/tx_float: add missing vzeroupper to 15xM PFA FFT
The AVX2 15xM PFA FFT calls its second-dimension subtransform with dirty
YMM. That subtransform may be a legacy-SSE codelet (fft4 is SSE2 only),
causing AVX<->SSE transition penalties. Clear them after the first
dimension, before the calls.
Detected with `sde64 -ast` FATE job.
Fixes: ace42cf581
This commit is contained in:
@@ -1874,6 +1874,8 @@ cglobal fft_pfa_15xM_float, 4, 14, 16, 320, ctx, out, in, stride, len, lut, buf,
|
||||
mov lutq, [ctxq + AVTXContext.map] ; load subtransform's map
|
||||
movsxd lenq, dword [ctxq + AVTXContext.len] ; load subtransform's length
|
||||
|
||||
vzeroupper
|
||||
|
||||
.dim2:
|
||||
call tgt5q ; call the FFT
|
||||
lea inq, [inq + lenq*8]
|
||||
|
||||
Reference in New Issue
Block a user