diff --git a/tools/depends/target/ffmpeg/0001-rpi-Add-hevc-acceleration.patch b/tools/depends/target/ffmpeg/0001-rpi-Add-hevc-acceleration.patch index af887b3e384e5..f4e829cbe0f4a 100644 --- a/tools/depends/target/ffmpeg/0001-rpi-Add-hevc-acceleration.patch +++ b/tools/depends/target/ffmpeg/0001-rpi-Add-hevc-acceleration.patch @@ -46576,7 +46576,7 @@ index 0000000000..a6b5e8a189 +}; + diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 02f23d954b..522009ccfb 100644 +index 02f23d954b..b516aa934a 100644 --- a/libavcodec/v4l2_buffers.c +++ b/libavcodec/v4l2_buffers.c @@ -21,6 +21,7 @@ @@ -46602,7 +46602,7 @@ index 02f23d954b..522009ccfb 100644 static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf) { -@@ -52,10 +54,8 @@ static inline AVCodecContext *logger(V4L2Buffer *buf) +@@ -52,34 +54,44 @@ static inline AVCodecContext *logger(V4L2Buffer *buf) static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf) { V4L2m2mContext *s = buf_to_m2mctx(avbuf); @@ -46610,12 +46610,60 @@ index 02f23d954b..522009ccfb 100644 - if (s->avctx->pkt_timebase.num) - return s->avctx->pkt_timebase; - return s->avctx->time_base; -+ const AVRational tb = s->avctx->pkt_timebase.num ? s->avctx->pkt_timebase : s->avctx->time_base; ++ const AVRational tb = s->avctx->pkt_timebase.num ? ++ s->avctx->pkt_timebase : ++ s->avctx->time_base; + return tb.num && tb.den ? tb : v4l2_timebase; } - static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts) -@@ -210,7 +210,79 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) +-static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts) ++static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts, int no_rescale) + { +- int64_t v4l2_pts; +- +- if (pts == AV_NOPTS_VALUE) +- pts = 0; +- + /* convert pts to v4l2 timebase */ +- v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); ++ const int64_t v4l2_pts = ++ no_rescale ? pts : ++ pts == AV_NOPTS_VALUE ? 0 : ++ av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); + out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; + out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; + } + +-static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf) ++static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf, int no_rescale) + { +- int64_t v4l2_pts; +- + /* convert pts back to encoder timebase */ +- v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + ++ const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + + avbuf->buf.timestamp.tv_usec; + +- return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); ++ return ++ no_rescale ? v4l2_pts : ++ v4l2_pts == 0 ? AV_NOPTS_VALUE : ++ av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); ++} ++ ++static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length) ++{ ++ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { ++ out->planes[plane].bytesused = bytesused; ++ out->planes[plane].length = length; ++ } else { ++ out->buf.bytesused = bytesused; ++ out->buf.length = length; ++ } + } + + static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) +@@ -210,7 +222,79 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) return AVCOL_TRC_UNSPECIFIED; } @@ -46696,7 +46744,7 @@ index 02f23d954b..522009ccfb 100644 { V4L2Buffer* avbuf = opaque; V4L2m2mContext *s = buf_to_m2mctx(avbuf); -@@ -226,14 +298,52 @@ static void v4l2_free_buffer(void *opaque, uint8_t *unused) +@@ -226,14 +310,52 @@ static void v4l2_free_buffer(void *opaque, uint8_t *unused) /* no need to queue more buffers to the driver */ avbuf->status = V4L2BUF_AVAILABLE; } @@ -46707,7 +46755,7 @@ index 02f23d954b..522009ccfb 100644 ff_v4l2_buffer_enqueue(avbuf); + } + else { -+ av_log(logger(avbuf), AV_LOG_ERROR, "=== %s: Buffer freed but streamoff\n", avbuf->context->name); ++ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", avbuf->context->name); + } } @@ -46750,7 +46798,7 @@ index 02f23d954b..522009ccfb 100644 static int v4l2_buf_increase_ref(V4L2Buffer *in) { V4L2m2mContext *s = buf_to_m2mctx(in); -@@ -254,6 +364,24 @@ static int v4l2_buf_increase_ref(V4L2Buffer *in) +@@ -254,6 +376,24 @@ static int v4l2_buf_increase_ref(V4L2Buffer *in) return 0; } @@ -46775,27 +46823,16 @@ index 02f23d954b..522009ccfb 100644 static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf) { int ret; -@@ -274,7 +402,18 @@ static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf) +@@ -274,7 +414,7 @@ static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf) return ret; } -static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset, AVBufferRef* bref) -+static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length) -+{ -+ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { -+ out->planes[plane].bytesused = bytesused; -+ out->planes[plane].length = length; -+ } else { -+ out->buf.bytesused = bytesused; -+ out->buf.length = length; -+ } -+} -+ +static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset) { unsigned int bytesused, length; -@@ -286,13 +425,7 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i +@@ -286,13 +426,7 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset)); @@ -46810,7 +46847,7 @@ index 02f23d954b..522009ccfb 100644 return 0; } -@@ -303,13 +436,25 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) +@@ -303,13 +437,25 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) frame->format = avbuf->context->av_pix_fmt; @@ -46840,7 +46877,7 @@ index 02f23d954b..522009ccfb 100644 } /* fixup special cases */ -@@ -338,68 +483,95 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) +@@ -338,68 +484,95 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) return 0; } @@ -46888,22 +46925,24 @@ index 02f23d954b..522009ccfb 100644 + } } +} ++ ++static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes) ++{ ++ return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA)); ++} - if (!is_planar_format) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); - int planes_nb = 0; - int offset = 0; -+static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes) -+{ -+ return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA)); -+} -+ +static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) +{ + int i; + int num_planes = 0; + int pel_strides[4] = {0}; -+ + +- for (i = 0; i < desc->nb_components; i++) +- planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); + + if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) { @@ -46911,17 +46950,15 @@ index 02f23d954b..522009ccfb 100644 + return -1; + } -- for (i = 0; i < desc->nb_components; i++) -- planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); +- for (i = 0; i < planes_nb; i++) { +- int size, h = height; +- if (i == 1 || i == 2) { + for (i = 0; i != desc->nb_components; ++i) { + if (desc->comp[i].plane >= num_planes) + num_planes = desc->comp[i].plane + 1; + pel_strides[desc->comp[i].plane] = desc->comp[i].step; + } - -- for (i = 0; i < planes_nb; i++) { -- int size, h = height; -- if (i == 1 || i == 2) { ++ + if (out->num_planes > 1) { + if (num_planes != out->num_planes) { + av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes); @@ -46987,12 +47024,43 @@ index 02f23d954b..522009ccfb 100644 return 0; } -@@ -475,11 +647,17 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) +@@ -411,12 +584,12 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) + + int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) + { +- v4l2_set_pts(out, frame->pts); ++ v4l2_set_pts(out, frame->pts, 0); + + return v4l2_buffer_swframe_to_buf(frame, out); + } + +-int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) ++int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf, int no_rescale_pts) + { + int ret; + +@@ -433,7 +606,7 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) + frame->colorspace = v4l2_get_color_space(avbuf); + frame->color_range = v4l2_get_color_range(avbuf); + frame->color_trc = v4l2_get_color_trc(avbuf); +- frame->pts = v4l2_get_pts(avbuf); ++ frame->pts = v4l2_get_pts(avbuf, no_rescale_pts); + frame->pkt_dts = AV_NOPTS_VALUE; + + /* these values are updated also during re-init in v4l2_process_driver_event */ +@@ -470,20 +643,27 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) + pkt->flags |= AV_PKT_FLAG_CORRUPT; + } + +- pkt->dts = pkt->pts = v4l2_get_pts(avbuf); ++ pkt->dts = pkt->pts = v4l2_get_pts(avbuf, 0); + return 0; } -int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) -+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, const void *extdata, size_t extlen) ++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, ++ const void *extdata, size_t extlen, int no_rescale_pts) { int ret; @@ -47007,19 +47075,24 @@ index 02f23d954b..522009ccfb 100644 if (ret) return ret; -@@ -491,6 +669,11 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) +- v4l2_set_pts(out, pkt->pts); ++ v4l2_set_pts(out, pkt->pts, no_rescale_pts); + + if (pkt->flags & AV_PKT_FLAG_KEY) + out->flags = V4L2_BUF_FLAG_KEYFRAME; +@@ -491,6 +671,11 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) return 0; } +int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) +{ -+ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0); ++ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0); +} + int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) { V4L2Context *ctx = avbuf->context; -@@ -500,6 +683,27 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) +@@ -500,6 +685,27 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) avbuf->buf.type = ctx->type; avbuf->buf.index = index; @@ -47047,7 +47120,7 @@ index 02f23d954b..522009ccfb 100644 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->buf.length = VIDEO_MAX_PLANES; avbuf->buf.m.planes = avbuf->planes; -@@ -527,14 +731,22 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) +@@ -527,14 +733,22 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length; @@ -47076,7 +47149,7 @@ index 02f23d954b..522009ccfb 100644 } if (avbuf->plane_info[i].mm_addr == MAP_FAILED) -@@ -543,9 +755,6 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) +@@ -543,9 +757,6 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) avbuf->status = V4L2BUF_AVAILABLE; @@ -47086,7 +47159,7 @@ index 02f23d954b..522009ccfb 100644 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->buf.m.planes = avbuf->planes; avbuf->buf.length = avbuf->num_planes; -@@ -555,6 +764,15 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) +@@ -555,6 +766,15 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) avbuf->buf.length = avbuf->planes[0].length; } @@ -47102,7 +47175,7 @@ index 02f23d954b..522009ccfb 100644 return ff_v4l2_buffer_enqueue(avbuf); } -@@ -568,6 +786,9 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) +@@ -568,6 +788,9 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) if (ret < 0) return AVERROR(errno); @@ -47113,7 +47186,7 @@ index 02f23d954b..522009ccfb 100644 return 0; diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h -index 8dbc7fc104..7baf618c66 100644 +index 8dbc7fc104..46ca85ce65 100644 --- a/libavcodec/v4l2_buffers.h +++ b/libavcodec/v4l2_buffers.h @@ -27,6 +27,7 @@ @@ -47134,17 +47207,32 @@ index 8dbc7fc104..7baf618c66 100644 /* This object is refcounted per-plane, so we need to keep track * of how many context-refs we are holding. */ AVBufferRef *context_ref; -@@ -98,6 +102,8 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); +@@ -70,11 +74,12 @@ typedef struct V4L2Buffer { + * + * @param[in] frame The AVFRame to push the information to + * @param[in] buf The V4L2Buffer to get the information from ++ * @param[in] no_rescale_pts If non-zero do not rescale PTS + * + * @returns 0 in case of success, AVERROR(EINVAL) if the number of planes is incorrect, + * AVERROR(ENOMEM) if the AVBufferRef can't be created. + */ +-int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf); ++int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf, int no_rescale_pts); + + /** + * Extracts the data from a V4L2Buffer to an AVPacket +@@ -98,6 +103,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); */ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); -+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, const void *extdata, size_t extlen); ++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, ++ const void *extdata, size_t extlen, int no_rescale_pts); + /** * Extracts the data from an AVFrame to a V4L2Buffer * diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 29b144ed73..e87b5a4432 100644 +index 29b144ed73..582c9b1ffc 100644 --- a/libavcodec/v4l2_context.c +++ b/libavcodec/v4l2_context.c @@ -173,7 +173,8 @@ static int v4l2_handle_event(V4L2Context *ctx) @@ -47360,26 +47448,44 @@ index 29b144ed73..e87b5a4432 100644 return 0; } -@@ -608,7 +698,7 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) +@@ -608,7 +698,8 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) return ff_v4l2_buffer_enqueue(avbuf); } -int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) -+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * extdata, size_t extlen) ++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, ++ const void * extdata, size_t extlen, int no_rescale_pts) { V4L2m2mContext *s = ctx_to_m2mctx(ctx); V4L2Buffer* avbuf; -@@ -626,7 +716,7 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) +@@ -626,14 +717,14 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) if (!avbuf) return AVERROR(EAGAIN); - ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf); -+ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen); ++ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, no_rescale_pts); if (ret) return ret; + return ff_v4l2_buffer_enqueue(avbuf); + } + +-int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) ++int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout, int no_rescale_pts) + { + V4L2Buffer *avbuf; + +@@ -650,7 +741,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) + return AVERROR(EAGAIN); + } + +- return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); ++ return ff_v4l2_buffer_buf_to_avframe(frame, avbuf, no_rescale_pts); + } + + int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 22a9532444..e459c72c45 100644 +index 22a9532444..3484a25a9c 100644 --- a/libavcodec/v4l2_context.h +++ b/libavcodec/v4l2_context.h @@ -92,6 +92,9 @@ typedef struct V4L2Context { @@ -47392,12 +47498,26 @@ index 22a9532444..e459c72c45 100644 } V4L2Context; /** -@@ -170,7 +173,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); +@@ -156,9 +159,12 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); + * @param[in] ctx The V4L2Context to dequeue from. + * @param[inout] f The AVFrame to dequeue to. + * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds) ++ * @param[in] no_rescale_pts (0 rescale pts, 1 use pts as ++ * timestamp directly) ++ * + * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. + */ +-int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); ++int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout, int no_rescale_pts); + + /** + * Enqueues a buffer to a V4L2Context from an AVPacket +@@ -170,7 +176,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); * @param[in] pkt A pointer to an AVPacket. * @return 0 in case of success, a negative error otherwise. */ -int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt); -+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size); ++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size, int no_rescale_pts); /** * Enqueues a buffer to a V4L2Context from an AVFrame @@ -47501,7 +47621,7 @@ index 456281f48c..b08a5b38ac 100644 /** diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 3e17e0fcac..c397f2ca2f 100644 +index 3e17e0fcac..b9eb2a6acc 100644 --- a/libavcodec/v4l2_m2m_dec.c +++ b/libavcodec/v4l2_m2m_dec.c @@ -23,6 +23,9 @@ @@ -47548,7 +47668,7 @@ index 3e17e0fcac..c397f2ca2f 100644 + if (ret < 0) + av_log(avctx, AV_LOG_ERROR, "VIDIOC_DECODER_CMD start error: %d\n", errno); + else -+ av_log(avctx, AV_LOG_DEBUG, "VIDIOC_DECODER_CMD start OK\n", errno); ++ av_log(avctx, AV_LOG_DEBUG, "VIDIOC_DECODER_CMD start OK\n"); + + return ret; +} @@ -47591,28 +47711,31 @@ index 3e17e0fcac..c397f2ca2f 100644 /* 3. set the crop parameters */ selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -@@ -133,28 +167,257 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s) +@@ -133,54 +167,291 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s) return 0; } -+#define XLAT_PTS 1 -+ +-static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) +static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) +{ -+ const AVRational t = avctx->pkt_timebase.num ? avctx->pkt_timebase : avctx->time_base; -+ return !t.num || !t.den ? (int64_t)n * 1000000 : ((int64_t)n * t.den) / (t.num); ++ return (int64_t)n; +} + +static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) +{ -+ const AVRational t = avctx->pkt_timebase.num ? avctx->pkt_timebase : avctx->time_base; -+ return (unsigned int)(!t.num || !t.den ? pts / 1000000 : (pts * t.num) / t.den); ++ return (unsigned int)pts; +} + ++// FFmpeg requires us to propagate a number of vars from the coded pkt into ++// the decoded frame. The only thing that tracks like that in V4L2 stateful ++// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no ++// guarantees about PTS being unique or specified for every frame so replace ++// the supplied PTS with a simple incrementing number and keep a circular ++// buffer of all the things we want preserved (including the original PTS) ++// indexed by the tracking no. +static void +xlat_pts_in(AVCodecContext *const avctx, V4L2m2mContext *const s, AVPacket *const avpkt) +{ -+#if XLAT_PTS + int64_t track_pts; + + // Avoid 0 @@ -47633,14 +47756,12 @@ index 3e17e0fcac..c397f2ca2f 100644 + .track_pts = track_pts + }; + avpkt->pts = track_pts; -+#endif +} + +// Returns -1 if we should discard the frame +static int +xlat_pts_out(AVCodecContext *const avctx, V4L2m2mContext *const s, AVFrame *const frame) +{ -+#if XLAT_PTS + unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; + const V4L2m2mTrackEl *const t = s->track_els + n; + if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) @@ -47679,7 +47800,6 @@ index 3e17e0fcac..c397f2ca2f 100644 + frame->best_effort_timestamp = frame->pts; + frame->pkt_dts = frame->pts; // We can't emulate what s/w does in a useful manner? + av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 ", DTS=%" PRId64 "\n", frame->pts, frame->pkt_dts); -+#endif + return 0; +} + @@ -47687,29 +47807,41 @@ index 3e17e0fcac..c397f2ca2f 100644 + return s->capture.streamon && s->output.streamon; +} + ++#define NQ_OK 0 ++#define NQ_Q_FULL 1 ++#define NQ_SRC_EMPTY 2 ++#define NQ_DEAD 3 + -+// -ve Error -+// 0 OK -+// 1 Dst full (retry if we think V4L2 Q has space now) -+// 2 Src empty (do not retry) -+// 3 Not started (do not retry, do not attempt capture dQ) ++// AVERROR_EOF Flushing an already flushed stream ++// -ve Error (all errors except EOF are unexpected) ++// NQ_OK (0) OK ++// NQ_Q_FULL Dst full (retry if we think V4L2 Q has space now) ++// NQ_SRC_EMPTY Src empty (do not retry) ++// NQ_DEAD Not running (do not retry, do not attempt capture dQ) + +static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s) -+{ -+ AVPacket avpkt = {0}; + { +- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; +- V4L2Context *const capture = &s->capture; +- V4L2Context *const output = &s->output; + AVPacket avpkt = {0}; +- int ret; + int ret = 0; + int ret2 = 0; -+ -+ if (s->buf_pkt.size) { + + if (s->buf_pkt.size) { +- avpkt = s->buf_pkt; +- memset(&s->buf_pkt, 0, sizeof(AVPacket)); + av_packet_move_ref(&avpkt, &s->buf_pkt); -+ } else { -+ ret = ff_decode_get_packet(avctx, &avpkt); + } else { + ret = ff_decode_get_packet(avctx, &avpkt); +- if (ret < 0 && ret != AVERROR_EOF) + if (ret == AVERROR(EAGAIN)) { + if (!stream_started(s)) { + av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__); -+ return 3; ++ return NQ_DEAD; + } -+ return 2; ++ return NQ_SRC_EMPTY; + } + + if (ret == AVERROR_EOF || avpkt.size == 0) { @@ -47726,38 +47858,50 @@ index 3e17e0fcac..c397f2ca2f 100644 + // On the offchance that get_packet left something that needs freeing in here + av_packet_unref(&avpkt); + // Calling enqueue with an empty pkt starts drain -+ ret = ff_v4l2_context_enqueue_packet(&s->output, &avpkt, NULL, 0); ++ ret = ff_v4l2_context_enqueue_packet(&s->output, &avpkt, NULL, 0, 1); + if (ret) { + av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret); + return ret; + } + } -+ return 2; ++ return NQ_SRC_EMPTY; + } + + if (ret < 0) -+ return ret; + return ret; + + xlat_pts_in(avctx, s, &avpkt); -+ } -+ + } + +- if (s->draining) +- goto dequeue; + if ((ret = check_output_streamon(avctx, s)) != 0) + return ret; -+ + +- ret = ff_v4l2_context_enqueue_packet(output, &avpkt); +- if (ret < 0) { +- if (ret != AVERROR(EAGAIN)) +- return ret; + ret = ff_v4l2_context_enqueue_packet(&s->output, &avpkt, -+ avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size); ++ avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size, ++ 1); + s->extdata_sent = 1; -+ + +- s->buf_pkt = avpkt; +- /* no input buffers available, continue dequeing */ + if (ret == AVERROR(EAGAIN)) { + // Out of input buffers - stash + av_packet_move_ref(&s->buf_pkt, &avpkt); -+ ret = 1; -+ } ++ ret = NQ_Q_FULL; + } + else { + // In all other cases we are done with this packet + av_packet_unref(&avpkt); -+ -+ if (ret) { + +- if (avpkt.size) { +- ret = v4l2_try_start(avctx); + if (ret) { +- av_packet_unref(&avpkt); + av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret); + return ret; + } @@ -47767,47 +47911,71 @@ index 3e17e0fcac..c397f2ca2f 100644 + ret2 = v4l2_try_start(avctx); + if (ret2) { + av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2); -+ ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : 3; ++ ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD; + } + + return ret; +} + - static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - { -+#if 1 ++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) ++{ + V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; + int src_rv; -+ int dst_rv = 1; ++ int dst_rv = 1; // Non-zero (done), non-negative (error) number + + do { + src_rv = try_enqueue_src(avctx, s); + -+ if (src_rv < 0) { ++ if (src_rv < 0) + av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", src_rv); -+ } + -+ if (s->req_pkt && src_rv == 2 && !s->draining) ++ // If we got a frame last time and we have nothing to enqueue then ++ // return now. rv will be AVERROR(EAGAIN) indicating that we want more input ++ // This should mean that once decode starts we enter a stable state where ++ // we alternately ask for input and produce output ++ if (s->req_pkt && src_rv == NQ_SRC_EMPTY && !s->draining) + break; + -+ if (src_rv == 1 && dst_rv == AVERROR(EAGAIN)) { ++ if (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) { + av_log(avctx, AV_LOG_WARNING, "Poll says src Q has space but enqueue fail"); -+ src_rv = 2; ++ src_rv = NQ_SRC_EMPTY; // If we can't enqueue pretend that there is nothing to enqueue + } -+ -+ if (src_rv >= 0 && src_rv <= 2 && dst_rv != 0) { + +- /* cant recover */ +- if (ret == AVERROR(ENOMEM)) +- return ret; ++ // Try to get a new frame if ++ // (a) we haven't already got one AND ++ // (b) enqueue returned a status indicating that decode is alive ++ if (dst_rv != 0 && ++ (src_rv == NQ_OK || src_rv == NQ_Q_FULL || src_rv == NQ_SRC_EMPTY)) { + do { + // Dequeue frame will unref any previous contents of frame + // so we don't need an explicit unref when discarding -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, -1); -+ -+ if (dst_rv < 0) { -+ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", s->draining, s->capture.done, dst_rv); ++ // This returns AVERROR(EAGAIN) if there isn't a frame ready yet ++ // but there is room in the input Q ++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, -1, 1); ++ ++ if (dst_rv < 0 && dst_rv != AVERROR(EAGAIN)) { ++ if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) ++ av_log(avctx, AV_LOG_DEBUG, ++ "Dequeue EOF: draining=%d, cap.done=%d\n", ++ s->draining, s->capture.done); ++ else ++ av_log(avctx, AV_LOG_ERROR, ++ "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", ++ s->draining, s->capture.done, dst_rv); + } + ++ // Go again if we got a frame that we need to discard + } while (dst_rv == 0 && xlat_pts_out(avctx, s, frame)); + } -+ } while (src_rv == 0 || (src_rv == 1 && dst_rv == AVERROR(EAGAIN)) ); + +- return 0; ++ // Continue trying to enqueue packets if either ++ // (a) we succeeded last time OR ++ // (b) enqueue failed due to input Q full AND there is now room ++ } while (src_rv == NQ_OK || (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) ); + + if (dst_rv) + av_frame_unref(frame); @@ -47815,59 +47983,28 @@ index 3e17e0fcac..c397f2ca2f 100644 + // If we got a frame this time ask for a pkt next time + s->req_pkt = (dst_rv == 0); + ++#if 0 ++ if (dst_rv == 0) ++ { ++ static int z = 0; ++ if (++z > 50) { ++ av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n"); ++ ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); ++ return -1; + } + } ++#endif + +-dequeue: +- if (!s->buf_pkt.size) +- av_packet_unref(&avpkt); +- return ff_v4l2_context_dequeue_frame(capture, frame, -1); + return dst_rv == 0 ? 0 : + src_rv < 0 ? src_rv : + dst_rv < 0 ? dst_rv : + AVERROR(EAGAIN); ++} + -+#else - V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; - V4L2Context *const capture = &s->capture; - V4L2Context *const output = &s->output; - AVPacket avpkt = {0}; -- int ret; -+ int ret = 0; - - if (s->buf_pkt.size) { -- avpkt = s->buf_pkt; -- memset(&s->buf_pkt, 0, sizeof(AVPacket)); -+ av_packet_move_ref(&avpkt, &s->buf_pkt); - } else { - ret = ff_decode_get_packet(avctx, &avpkt); -- if (ret < 0 && ret != AVERROR_EOF) -+ if (ret < 0 && ret != AVERROR_EOF && ret != AVERROR(EAGAIN)) - return ret; -+ if (ret == 0) -+ xlat_pts_in(avctx, s, &avpkt); - } - -- if (s->draining) -+ if (ret) - goto dequeue; - -- ret = ff_v4l2_context_enqueue_packet(output, &avpkt); -+// av_log(avctx, AV_LOG_INFO, "Extdata len=%d, sent=%d\n", avctx->extradata_size, s->extdata_sent); -+ ret = ff_v4l2_context_enqueue_packet(output, &avpkt, -+ avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size); -+ s->extdata_sent = 1; - if (ret < 0) { -+ av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret); - if (ret != AVERROR(EAGAIN)) - return ret; - -@@ -178,9 +441,36 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - dequeue: - if (!s->buf_pkt.size) - av_packet_unref(&avpkt); -- return ff_v4l2_context_dequeue_frame(capture, frame, -1); -+ -+ ret = ff_v4l2_context_dequeue_frame(capture, frame, -1); -+ if (!ret) -+ xlat_pts_out(avctx, s, frame); -+ return ret; -+#endif - } - +#if 0 +#include +static int64_t us_time(void) @@ -47875,8 +48012,8 @@ index 3e17e0fcac..c397f2ca2f 100644 + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000; -+} -+ + } + +static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) +{ + int ret; @@ -47893,7 +48030,7 @@ index 3e17e0fcac..c397f2ca2f 100644 static av_cold int v4l2_decode_init(AVCodecContext *avctx) { V4L2Context *capture, *output; -@@ -188,6 +478,9 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) +@@ -188,6 +459,9 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) V4L2m2mPriv *priv = avctx->priv_data; int ret; @@ -47903,7 +48040,7 @@ index 3e17e0fcac..c397f2ca2f 100644 ret = ff_v4l2_m2m_create_context(priv, &s); if (ret < 0) return ret; -@@ -208,13 +501,32 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) +@@ -208,13 +482,32 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; capture->av_pix_fmt = avctx->pix_fmt; @@ -47939,22 +48076,27 @@ index 3e17e0fcac..c397f2ca2f 100644 return ret; } -@@ -223,10 +535,68 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) +@@ -223,10 +516,59 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) static av_cold int v4l2_decode_close(AVCodecContext *avctx) { ++ int rv; + av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); -+ return ff_v4l2_m2m_codec_end(avctx->priv_data); -+ av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__); ++ rv = ff_v4l2_m2m_codec_end(avctx->priv_data); ++ av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv); ++ return rv; +} + +static void v4l2_decode_flush(AVCodecContext *avctx) +{ ++ // An alternatve and more drastic form of flush is to simply do this: ++ // v4l2_decode_close(avctx); ++ // v4l2_decode_init(avctx); ++ // The downside is that this keeps a decoder open until all the frames ++ // associated with it have been returned. This is a bit wasteful on ++ // possibly limited h/w resources and fails on a Pi for this reason unless ++ // more GPU mem is allocated than is the default. + -+#if 0 -+ v4l2_decode_close(avctx); -+ v4l2_decode_init(avctx); -+#else V4L2m2mPriv *priv = avctx->priv_data; - V4L2m2mContext *s = priv->context; - av_packet_unref(&s->buf_pkt); @@ -47964,7 +48106,10 @@ index 3e17e0fcac..c397f2ca2f 100644 + V4L2Context* capture = &s->capture; + int ret, i; + -+ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); ++ av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon); ++ ++ if (!output->streamon) ++ goto done; + + ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF); + if (ret < 0) @@ -47975,43 +48120,26 @@ index 3e17e0fcac..c397f2ca2f 100644 + output->buffers[i].status = V4L2BUF_AVAILABLE; + } + ++ // V4L2 makes no guarantees about whether decoded frames are flushed or not ++ // so mark all frames we are tracking to be discarded if they appear + for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) + s->track_els[i].discard = 1; + -+#if 0 -+ -+ ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF); -+ if (ret < 0) -+ av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s error: %d\n", capture->name, ret); -+ -+ -+ ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); -+ if (ret < 0) -+ av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON %s error: %d\n", capture->name, ret); -+ ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON); -+ if (ret < 0) -+ av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON %s error: %d\n", output->name, ret); -+ -+ struct v4l2_decoder_cmd cmd = { -+ .cmd = V4L2_DEC_CMD_START, -+ .flags = 0, -+ }; -+ -+ ret = ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd); -+ if (ret < 0) -+ av_log(avctx, AV_LOG_ERROR, "VIDIOC_DECODER_CMD start error: %d\n", errno); -+#endif -+ -+ s->draining = 0; ++ // resend extradata + s->extdata_sent = 0; ++ // clear EOS status vars ++ s->draining = 0; + output->done = 0; + capture->done = 0; -+#endif ++ ++ // Stream on will occur when we actually submit a new frame ++ ++done: + av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__); } #define OFFSET(x) offsetof(V4L2m2mPriv, x) -@@ -235,10 +605,16 @@ static av_cold int v4l2_decode_close(AVCodecContext *avctx) +@@ -235,10 +577,16 @@ static av_cold int v4l2_decode_close(AVCodecContext *avctx) static const AVOption options[] = { V4L_M2M_DEFAULT_OPTS, { "num_capture_buffers", "Number of buffers in the capture context", @@ -48029,7 +48157,7 @@ index 3e17e0fcac..c397f2ca2f 100644 #define M2MDEC_CLASS(NAME) \ static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \ .class_name = #NAME "_v4l2m2m_decoder", \ -@@ -259,9 +635,14 @@ static const AVOption options[] = { +@@ -259,9 +607,14 @@ static const AVOption options[] = { .init = v4l2_decode_init, \ .receive_frame = v4l2_receive_frame, \ .close = v4l2_decode_close, \ @@ -52638,10 +52766,10 @@ index 0000000000..c427b60d30 +}; diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c new file mode 100644 -index 0000000000..85bda396d7 +index 0000000000..d5b2e161d5 --- /dev/null +++ b/libavdevice/egl_vout.c -@@ -0,0 +1,782 @@ +@@ -0,0 +1,805 @@ +/* + * Copyright (c) 2020 John Cox for Raspberry Pi Trading + * @@ -52696,7 +52824,7 @@ index 0000000000..85bda396d7 + +#include "libavutil/rpi_sand_fns.h" + -+#define TRACE_ALL 1 ++#define TRACE_ALL 0 + +struct egl_setup { + int conId; @@ -52804,6 +52932,7 @@ index 0000000000..85bda396d7 + Window win; + EGLContext ctx; + bool fullscreen = false; /* Hook this up to a command line arg */ ++ EGLConfig config; + + if (fullscreen) { + int scrnum = DefaultScreen(dpy); @@ -52813,44 +52942,51 @@ index 0000000000..85bda396d7 + height = DisplayHeight(dpy, scrnum); + } + -+ static const EGLint attribs[] = { -+ EGL_RED_SIZE, 1, -+ EGL_GREEN_SIZE, 1, -+ EGL_BLUE_SIZE, 1, -+ EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, -+ EGL_NONE -+ }; -+ EGLConfig config; -+ EGLint num_configs; -+ if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) { -+ av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n"); -+ return -1; ++ { ++ EGLint num_configs; ++ static const EGLint attribs[] = { ++ EGL_RED_SIZE, 1, ++ EGL_GREEN_SIZE, 1, ++ EGL_BLUE_SIZE, 1, ++ EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, ++ EGL_NONE ++ }; ++ ++ if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) { ++ av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n"); ++ return -1; ++ } + } + -+ EGLint vid; -+ if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) { -+ av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n"); -+ return -1; -+ } ++ { ++ EGLint vid; ++ if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) { ++ av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n"); ++ return -1; ++ } + -+ XVisualInfo visTemplate = { -+ .visualid = vid, -+ }; -+ int num_visuals; -+ XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask, -+ &visTemplate, &num_visuals); -+ -+ /* window attributes */ -+ attr.background_pixel = 0; -+ attr.border_pixel = 0; -+ attr.colormap = XCreateColormap( dpy, root, visinfo->visual, AllocNone); -+ attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask; -+ /* XXX this is a bad way to get a borderless window! */ -+ mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask; -+ -+ win = XCreateWindow( dpy, root, x, y, width, height, -+ 0, visinfo->depth, InputOutput, -+ visinfo->visual, mask, &attr ); ++ { ++ XVisualInfo visTemplate = { ++ .visualid = vid, ++ }; ++ int num_visuals; ++ XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask, ++ &visTemplate, &num_visuals); ++ ++ /* window attributes */ ++ attr.background_pixel = 0; ++ attr.border_pixel = 0; ++ attr.colormap = XCreateColormap( dpy, root, visinfo->visual, AllocNone); ++ attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask; ++ /* XXX this is a bad way to get a borderless window! */ ++ mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask; ++ ++ win = XCreateWindow( dpy, root, x, y, width, height, ++ 0, visinfo->depth, InputOutput, ++ visinfo->visual, mask, &attr ); ++ XFree(visinfo); ++ } ++ } + + if (fullscreen) + no_border(dpy, win); @@ -52870,35 +53006,38 @@ index 0000000000..85bda396d7 + + eglBindAPI(EGL_OPENGL_ES_API); + -+ static const EGLint ctx_attribs[] = { -+ EGL_CONTEXT_CLIENT_VERSION, 2, -+ EGL_NONE -+ }; -+ ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs ); -+ if (!ctx) { -+ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); -+ return -1; ++ { ++ static const EGLint ctx_attribs[] = { ++ EGL_CONTEXT_CLIENT_VERSION, 2, ++ EGL_NONE ++ }; ++ ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs ); ++ if (!ctx) { ++ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); ++ return -1; ++ } + } + -+ XFree(visinfo); + + XMapWindow(dpy, win); + -+ EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, -+ (void *)(uintptr_t)win, NULL); -+ if (!surf) { -+ av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n"); -+ return -1; -+ } ++ { ++ EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, ++ (void *)(uintptr_t)win, NULL); ++ if (!surf) { ++ av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n"); ++ return -1; ++ } + -+ if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) { -+ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); -+ return -1; -+ } ++ if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) { ++ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); ++ return -1; ++ } + -+ *winRet = win; -+ *ctxRet = ctx; -+ *surfRet = surf; ++ *winRet = win; ++ *ctxRet = ctx; ++ *surfRet = surf; ++ } + + return 0; +} @@ -52916,20 +53055,22 @@ index 0000000000..85bda396d7 + glShaderSource(s, 1, (const GLchar **) &source, NULL); + glCompileShader(s); + -+ GLint ok; -+ glGetShaderiv(s, GL_COMPILE_STATUS, &ok); ++ { ++ GLint ok; ++ glGetShaderiv(s, GL_COMPILE_STATUS, &ok); + -+ if (!ok) { -+ GLchar *info; -+ GLint size; ++ if (!ok) { ++ GLchar *info; ++ GLint size; + -+ glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size); -+ info = malloc(size); ++ glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size); ++ info = malloc(size); + -+ glGetShaderInfoLog(s, size, NULL, info); -+ av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source); ++ glGetShaderInfoLog(s, size, NULL, info); ++ av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source); + -+ return 0; ++ return 0; ++ } + } + + return s; @@ -52948,23 +53089,25 @@ index 0000000000..85bda396d7 + glAttachShader(prog, fs); + glLinkProgram(prog); + -+ GLint ok; -+ glGetProgramiv(prog, GL_LINK_STATUS, &ok); -+ if (!ok) { -+ /* Some drivers return a size of 1 for an empty log. This is the size -+ * of a log that contains only a terminating NUL character. -+ */ -+ GLint size; -+ GLchar *info = NULL; -+ glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size); -+ if (size > 1) { -+ info = malloc(size); -+ glGetProgramInfoLog(prog, size, NULL, info); -+ } ++ { ++ GLint ok; ++ glGetProgramiv(prog, GL_LINK_STATUS, &ok); ++ if (!ok) { ++ /* Some drivers return a size of 1 for an empty log. This is the size ++ * of a log that contains only a terminating NUL character. ++ */ ++ GLint size; ++ GLchar *info = NULL; ++ glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size); ++ if (size > 1) { ++ info = malloc(size); ++ glGetProgramInfoLog(prog, size, NULL, info); ++ } + -+ av_log(s, AV_LOG_ERROR, "Failed to link: %s\n", -+ (info != NULL) ? info : ""); -+ return 0; ++ av_log(s, AV_LOG_ERROR, "Failed to link: %s\n", ++ (info != NULL) ? info : ""); ++ return 0; ++ } + } + + return prog; @@ -53002,13 +53145,16 @@ index 0000000000..85bda396d7 + + glUseProgram(prog); + -+ static const float verts[] = { -+ -1, -1, -+ 1, -1, -+ 1, 1, -+ -1, 1, -+ }; -+ glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts); ++ { ++ static const float verts[] = { ++ -1, -1, ++ 1, -1, ++ 1, 1, ++ -1, 1, ++ }; ++ glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts); ++ } ++ + glEnableVertexAttribArray(0); + return 0; +} @@ -53116,26 +53262,29 @@ index 0000000000..85bda396d7 + + *a = EGL_NONE; + ++#if TRACE_ALL + for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) { + av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]); + } ++#endif ++ { ++ const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy, ++ EGL_NO_CONTEXT, ++ EGL_LINUX_DMA_BUF_EXT, ++ NULL, attribs); ++ if (!image) { ++ av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd); ++ return -1; ++ } + -+ EGLImage image = eglCreateImageKHR(de->setup.egl_dpy, -+ EGL_NO_CONTEXT, -+ EGL_LINUX_DMA_BUF_EXT, -+ NULL, attribs); -+ if (!image) { -+ fprintf(stderr, "Failed to import fd %d\n", desc->objects[0].fd); -+ exit(1); -+ } -+ -+ glGenTextures(1, &da->texture); -+ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); -+ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -+ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -+ glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image); ++ glGenTextures(1, &da->texture); ++ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); ++ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR); ++ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR); ++ glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image); + -+ eglDestroyImageKHR(de->setup.egl_dpy, image); ++ eglDestroyImageKHR(de->setup.egl_dpy, image); ++ } + + da->fd = desc->objects[0].fd; + @@ -53222,7 +53371,9 @@ index 0000000000..85bda396d7 + goto fail; + } + ++#if TRACE_ALL + av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__); ++#endif + sem_post(&de->display_start_sem); + + for (;;) { @@ -53342,7 +53493,7 @@ index 0000000000..85bda396d7 + egl_display_env_t * const de = s->priv_data; + unsigned int i; + -+ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); + + de->setup = (struct egl_setup){0}; + @@ -53362,7 +53513,7 @@ index 0000000000..85bda396d7 + return -1; + } + -+ av_log(s, AV_LOG_INFO, ">>> %s\n", __func__); ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); + + return 0; +} @@ -53371,7 +53522,7 @@ index 0000000000..85bda396d7 +{ + egl_display_env_t * const de = s->priv_data; + -+ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); + + de->q_terminate = 1; + sem_post(&de->q_sem); @@ -53382,7 +53533,7 @@ index 0000000000..85bda396d7 + av_frame_free(&de->q_next); + av_frame_free(&de->q_this); + -+ av_log(s, AV_LOG_INFO, ">>> %s\n", __func__); ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); +} + +#define OFFSET(x) offsetof(egl_display_env_t, x) @@ -54533,6 +54684,316 @@ index 9b08372eb2..b0b5be0fa6 100644 OBJS += $(COMPAT_OBJS:%=../compat/%) +diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile +index 5613813ba8..ab8bcfcf34 100644 +--- a/libavutil/aarch64/Makefile ++++ b/libavutil/aarch64/Makefile +@@ -1,4 +1,6 @@ + OBJS += aarch64/cpu.o \ + aarch64/float_dsp_init.o \ + +-NEON-OBJS += aarch64/float_dsp_neon.o ++NEON-OBJS += aarch64/float_dsp_neon.o \ ++ aarch64/rpi_sand_neon.o \ ++ +diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S +new file mode 100644 +index 0000000000..641242dd8f +--- /dev/null ++++ b/libavutil/aarch64/rpi_sand_neon.S +@@ -0,0 +1,239 @@ ++/* ++Copyright (c) 2021 Michael Eiler ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: Michael Eiler ++*/ ++ ++#include "asm.S" ++ ++// void ff_rpi_sand8_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++ ++function ff_rpi_sand8_lines_to_planar_y8, export=1 ++ // w15 contains the number of rows we need to process ++ ldr w15, [sp, #0] ++ ++ // w8 will contain the number of blocks per row ++ // w8 = floor(_w/stride1) ++ // stride1 is assumed to always be 128 ++ mov w8, w1 ++ lsr w8, w8, #7 ++ ++ // in case the width of the image is not a multiple of 128, there will ++ // be an incomplete block at the end of every row ++ // w9 contains the number of pixels stored within this block ++ // w9 = _w - w8 * 128 ++ lsl w9, w8, #7 ++ sub w9, w7, w9 ++ ++ // this is the value we have to add to the src pointer after reading a complete block ++ // it will move the address to the start of the next block ++ // w10 = stride2 * stride1 - stride1 ++ mov w10, w4 ++ lsl w10, w10, #7 ++ sub w10, w10, #128 ++ ++ // w11 is the row offset, meaning the start offset of the first block of every collumn ++ // this will be increased with stride1 within every iteration of the row_loop ++ eor w11, w11, w11 ++ ++ // w12 = 0, processed row count ++ eor w12, w12, w12 ++row_loop: ++ // start of the first block within the current row ++ // x13 = row offset + src ++ mov x13, x2 ++ add x13, x13, x11 ++ ++ // w14 = 0, processed block count ++ eor w14, w14, w14 ++block_loop: ++ // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128 ++ // fortunately these aren't callee saved ones, meaning we don't need to backup them ++ ld1 { v0.16b, v1.16b, v2.16b, v3.16b}, [x13], #64 ++ ld1 { v4.16b, v5.16b, v6.16b, v7.16b}, [x13], #64 ++ ++ // write these registers back to the destination vector and increase the dst address by 128 ++ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 ++ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x0], #64 ++ ++ // move the source register to the beginning of the next block (x13 = src + block offset) ++ add x13, x13, x10 ++ // increase the block counter ++ add w14, w14, #1 ++ ++ // continue with the block_loop if we haven't copied all full blocks yet ++ cmp w8, w14 ++ bgt block_loop ++ ++ // handle the last block at the end of each row ++ // at most 127 byte values copied from src to dst ++ eor w5, w5, w5 // i = 0 ++incomplete_block_loop_y8: ++ cmp w5, w9 ++ bge incomplete_block_loop_end_y8 ++ ++ ldrb w6, [x13] ++ strb w6, [x0] ++ add x13, x13, #1 ++ add x0, x0, #1 ++ ++ add w5, w5, #1 ++ b incomplete_block_loop_y8 ++incomplete_block_loop_end_y8: ++ ++ ++ // increase the row offset by 128 (stride1) ++ add w11, w11, #128 ++ // increment the row counter ++ add w12, w12, #1 ++ ++ // process the next row if we haven't finished yet ++ cmp w15, w12 ++ bgt row_loop ++ ++ ret ++endfunc ++ ++ ++ ++// void ff_rpi_sand8_lines_to_planar_c8( ++// uint8_t * dst_u, : x0 ++// unsigned int dst_stride_u, : w1 == width ++// uint8_t * dst_v, : x2 ++// unsigned int dst_stride_v, : w3 == width ++// const uint8_t * src, : x4 ++// unsigned int stride1, : w5 == 128 ++// unsigned int stride2, : w6 ++// unsigned int _x, : w7 ++// unsigned int y, : [sp, #0] ++// unsigned int _w, : [sp, #8] ++// unsigned int h); : [sp, #16] ++ ++function ff_rpi_sand8_lines_to_planar_c8, export=1 ++ // w7 = width ++ ldr w7, [sp, #8] ++ ++ // w15 contains the number of rows we need to process ++ ldr w15, [sp, #16] ++ ++ // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6 ++ mov w8, w7 ++ lsr w8, w8, #6 ++ ++ // number of pixels in block at the end of every row ++ // w9 = _w - (w8 * 64) ++ lsl w9, w8, #6 ++ sub w9, w7, w9 ++ ++ // address delta to the beginning of the next block ++ // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128 ++ lsl w10, w6, #7 ++ sub w10, w10, #128 ++ ++ // w11 = row address start offset = 0 ++ eor w11, w11, w11 ++ ++ // w12 = 0, row counter ++ eor w12, w12, w12 ++row_loop_c8: ++ // start of the first block within the current row ++ // x13 = row offset + src ++ mov x13, x4 ++ add x13, x13, x11 ++ ++ // w14 = 0, processed block count ++ eor w14, w14, w14 ++block_loop_c8: ++ // load the full block -> 128 bytes, the block contains 64 interleaved U and V values ++ ld2 { v0.16b, v1.16b }, [x13], #32 ++ ld2 { v2.16b, v3.16b }, [x13], #32 ++ ld2 { v4.16b, v5.16b }, [x13], #32 ++ ld2 { v6.16b, v7.16b }, [x13], #32 ++ ++ // swap register so that we can write them out with a single instruction ++ mov v16.16b, v1.16b ++ mov v17.16b, v3.16b ++ mov v18.16b, v5.16b ++ mov v1.16b, v2.16b ++ mov v2.16b, v4.16b ++ mov v3.16b, v6.16b ++ mov v4.16b, v16.16b ++ mov v5.16b, v17.16b ++ mov v6.16b, v18.16b ++ ++ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 ++ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x2], #64 ++ ++ // increment row counter and move src to the beginning of the next block ++ add w14, w14, #1 ++ add x13, x13, x10 ++ ++ // jump to block_loop_c8 iff the block count is smaller than the number of full blocks ++ cmp w8, w14 ++ bgt block_loop_c8 ++ ++ // handle incomplete block at the end of every row ++ eor w5, w5, w5 // point counter, this might be ++incomplete_block_loop_c8: ++ cmp w5, w9 ++ bge incomplete_block_loop_end_c8 ++ ++ ldrb w1, [x13] ++ strb w1, [x0] ++ add x13, x13, #1 ++ ++ ldrb w1, [x13] ++ strb w1, [x2] ++ add x13, x13, #1 ++ ++ add x0, x0, #1 ++ add x2, x2, #1 ++ ++ add w5, w5, #1 ++ b incomplete_block_loop_c8 ++incomplete_block_loop_end_c8: ++ ++ ++ // increase row_offset by stride1 ++ add w11, w11, #128 ++ add w12, w12, #1 ++ ++ // jump to row_Loop_c8 iff the row count is small than the height ++ cmp w15, w12 ++ bgt row_loop_c8 ++ ++ ret ++endfunc ++ ++ +diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h +new file mode 100644 +index 0000000000..2894ce5aa3 +--- /dev/null ++++ b/libavutil/aarch64/rpi_sand_neon.h +@@ -0,0 +1,47 @@ ++/* ++Copyright (c) 2021 Michael Eiler ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: Michael Eiler ++*/ ++ ++#pragma once ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, ++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, ++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ ++void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u, ++ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++#ifdef __cplusplus ++} ++#endif ++ diff --git a/libavutil/arm/Makefile b/libavutil/arm/Makefile index 5da44b0542..b74b7c4e2f 100644 --- a/libavutil/arm/Makefile @@ -55784,7 +56245,7 @@ index 1c625cfc8a..3400390a77 100644 }; diff --git a/libavutil/rpi_sand_fn_pw.h b/libavutil/rpi_sand_fn_pw.h new file mode 100644 -index 0000000000..0d5d203dc3 +index 0000000000..0324f6826d --- /dev/null +++ b/libavutil/rpi_sand_fn_pw.h @@ -0,0 +1,227 @@ @@ -55844,7 +56305,7 @@ index 0000000000..0d5d203dc3 + const unsigned int w = _w; + const unsigned int mask = stride1 - 1; + -+#if PW == 1 && HAVE_SAND_ASM ++#if PW == 1 && (HAVE_SAND_ASM || HAVE_SAND_ASM64) + if (_x == 0) { + ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride, + src, stride1, stride2, _x, y, _w, h); @@ -55896,7 +56357,7 @@ index 0000000000..0d5d203dc3 + const unsigned int w = _w * 2; + const unsigned int mask = stride1 - 1; + -+#if PW == 1 && HAVE_SAND_ASM ++#if PW == 1 && (HAVE_SAND_ASM || HAVE_SAND_ASM64) + if (_x == 0) { + ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v, + src, stride1, stride2, _x, y, _w, h); @@ -56017,10 +56478,10 @@ index 0000000000..0d5d203dc3 + diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c new file mode 100644 -index 0000000000..ed0261b02f +index 0000000000..038c306877 --- /dev/null +++ b/libavutil/rpi_sand_fns.c -@@ -0,0 +1,353 @@ +@@ -0,0 +1,357 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. @@ -56060,6 +56521,10 @@ index 0000000000..ed0261b02f +#if ARCH_ARM && HAVE_NEON +#include "arm/rpi_sand_neon.h" +#define HAVE_SAND_ASM 1 ++#elif ARCH_AARCH64 && HAVE_NEON ++#include "aarch64/rpi_sand_neon.h" ++#define HAVE_SAND_ASM 0 ++#define HAVE_SAND_ASM64 1 +#else +#define HAVE_SAND_ASM 0 +#endif @@ -57337,14 +57802,16 @@ index 0000000000..29fa9fa68d +# -Wa,-ahls diff --git a/pi-util/conf_pi2.sh b/pi-util/conf_pi2.sh new file mode 100755 -index 0000000000..3dd5edcf83 +index 0000000000..92cd9e7cfd --- /dev/null +++ b/pi-util/conf_pi2.sh -@@ -0,0 +1,50 @@ +@@ -0,0 +1,57 @@ +echo "Configure for Pi2/3" + -+RPI_TOOLROOT=`pwd`/../tools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf -+RPI_OPT_VC=`pwd`/../firmware/hardfp/opt/vc ++FFSRC=`pwd` ++ ++RPI_TOOLROOT=$FFSRC/../tools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf ++RPI_OPT_VC=$FFSRC/../firmware/hardfp/opt/vc + +RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux" +RPI_LIBDIRS="-L$RPI_TOOLROOT/lib -L$RPI_OPT_VC/lib" @@ -57352,19 +57819,24 @@ index 0000000000..3dd5edcf83 +#RPI_KEEPS="-save-temps=obj" +RPI_KEEPS="" + -+USR_PREFIX=`pwd`/install -+LIB_PREFIX=$USR_PREFIX/lib/arm-linux-gnueabihf -+INC_PREFIX=$USR_PREFIX/include/arm-linux-gnueabihf -+ +SHARED_LIBS="--enable-shared" +if [ "$1" == "--noshared" ]; then + SHARED_LIBS="--disable-shared" ++ OUT=out/x-armv7-static-rel + echo Static libs +else + echo Shared libs ++ OUT=out/x-armv7-shared-rel +fi + -+./configure --enable-cross-compile\ ++USR_PREFIX=$FFSRC/$OUT/install ++LIB_PREFIX=$USR_PREFIX/lib/arm-linux-gnueabihf ++INC_PREFIX=$USR_PREFIX/include/arm-linux-gnueabihf ++ ++mkdir -p $FFSRC/$OUT ++cd $FFSRC/$OUT ++ ++$FFSRC/configure --enable-cross-compile\ + --prefix=$USR_PREFIX\ + --libdir=$LIB_PREFIX\ + --incdir=$INC_PREFIX\ @@ -57615,10 +58087,10 @@ index 0000000000..2e59e6ceb5 + diff --git a/pi-util/ffperf.py b/pi-util/ffperf.py new file mode 100755 -index 0000000000..2fabe98c32 +index 0000000000..65c5224cd8 --- /dev/null +++ b/pi-util/ffperf.py -@@ -0,0 +1,127 @@ +@@ -0,0 +1,128 @@ +#!/usr/bin/env python3 + +import time @@ -57657,14 +58129,14 @@ index 0000000000..2fabe98c32 + def __gt__(self, other): + return self.elapsed > other.elapsed + -+ def time_file(name, prefix): ++ def time_file(name, prefix, ffmpeg="./ffmpeg"): + stats = tstats() + stats.name = name + start_time = time.clock_gettime(time.CLOCK_MONOTONIC); -+ cproc = subprocess.Popen(["./ffmpeg", -+ "-hwaccel", "rpi", ++ cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw", ++ "-vcodec", "hevc_rpi", + "-t", "30", "-i", prefix + name, -+ "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog); ++ "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog); + pinfo = os.wait4(cproc.pid, 0) + end_time = time.clock_gettime(time.CLOCK_MONOTONIC); + stats.elapsed = end_time - start_time @@ -57692,6 +58164,7 @@ index 0000000000..2fabe98c32 + argp.add_argument("--csv_in", help="CSV input filename") + argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).") + argp.add_argument("--repeat", default=3, type=int, help="Run repeat count") ++ argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable") + + args = argp.parse_args() + @@ -57727,7 +58200,7 @@ index 0000000000..2fabe98c32 + + t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999}) + for i in range(args.repeat): -+ t = tstats.time_file(f, prefix) ++ t = tstats.time_file(f, prefix, args.ffmpeg) + print ("...", t.times_str()) + if t0 > t: + t0 = t