Add audio recording support

Make the recorder accept two input sources (video and audio), and mux them into a single file. PR #3757 <https://github.com/Genymobile/scrcpy/pull/3757>
2023-02-18 18:02:43 +01:00 · 2023-02-18 18:02:43 +01:00 · 7de0622214
commit 7de0622214
parent 3d29f6ef06
3 changed files with 334 additions and 81 deletions
--- a/app/src/recorder.c
+++ b/app/src/recorder.c
@ -8,9 +8,11 @@
 #include "util/log.h"
 #include "util/str.h"
-/** Downcast video packet_sink to recorder */
+/** Downcast packet sinks to recorder */
 #define DOWNCAST_VIDEO(SINK) \
    container_of(SINK, struct sc_recorder, video_packet_sink)
 #define DOWNCAST_AUDIO(SINK) \
    container_of(SINK, struct sc_recorder, audio_packet_sink)
 static const AVRational SCRCPY_TIME_BASE = {1, 1000000}; // timestamps in us
@ -79,9 +81,7 @@ sc_recorder_get_format_name(enum sc_record_format format) {
 }
 static bool
-sc_recorder_write_header(struct sc_recorder *recorder, const AVPacket *packet) {
+sc_recorder_set_extradata(AVStream *ostream, const AVPacket *packet) {
    AVStream *ostream = recorder->ctx->streams[0];
    uint8_t *extradata = av_malloc(packet->size * sizeof(uint8_t));
    if (!extradata) {
        LOG_OOM();
@ -93,20 +93,32 @@ sc_recorder_write_header(struct sc_recorder *recorder, const AVPacket *packet) {
    ostream->codecpar->extradata = extradata;
    ostream->codecpar->extradata_size = packet->size;
-
+    return true;
    return avformat_write_header(recorder->ctx, NULL) >= 0;
 }
-static void
+static inline void
-sc_recorder_rescale_packet(struct sc_recorder *recorder, AVPacket *packet) {
+sc_recorder_rescale_packet(AVStream *stream, AVPacket *packet) {
-    AVStream *ostream = recorder->ctx->streams[0];
+    av_packet_rescale_ts(packet, SCRCPY_TIME_BASE, stream->time_base);
    av_packet_rescale_ts(packet, SCRCPY_TIME_BASE, ostream->time_base);
 }
 static bool
-sc_recorder_write(struct sc_recorder *recorder, AVPacket *packet) {
+sc_recorder_write_stream(struct sc_recorder *recorder, int stream_index,
-    sc_recorder_rescale_packet(recorder, packet);
+                         AVPacket *packet) {
-    return av_write_frame(recorder->ctx, packet) >= 0;
+    AVStream *stream = recorder->ctx->streams[stream_index];
    sc_recorder_rescale_packet(stream, packet);
    return av_interleaved_write_frame(recorder->ctx, packet) >= 0;
 }
 static inline bool
 sc_recorder_write_video(struct sc_recorder *recorder, AVPacket *packet) {
    return sc_recorder_write_stream(recorder, recorder->video_stream_index,
                                    packet);
 }
 static inline bool
 sc_recorder_write_audio(struct sc_recorder *recorder, AVPacket *packet) {
    return sc_recorder_write_stream(recorder, recorder->audio_stream_index,
                                    packet);
 }
 static bool
@ -162,134 +174,270 @@ sc_recorder_wait_video_stream(struct sc_recorder *recorder) {
    sc_mutex_unlock(&recorder->mutex);
    if (codec) {
-        AVStream *ostream = avformat_new_stream(recorder->ctx, codec);
+        AVStream *stream = avformat_new_stream(recorder->ctx, codec);
-        if (!ostream) {
+        if (!stream) {
            return false;
        }
-        ostream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+        stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
-        ostream->codecpar->codec_id = codec->id;
+        stream->codecpar->codec_id = codec->id;
-        ostream->codecpar->format = AV_PIX_FMT_YUV420P;
+        stream->codecpar->format = AV_PIX_FMT_YUV420P;
-        ostream->codecpar->width = recorder->declared_frame_size.width;
+        stream->codecpar->width = recorder->declared_frame_size.width;
-        ostream->codecpar->height = recorder->declared_frame_size.height;
+        stream->codecpar->height = recorder->declared_frame_size.height;
        recorder->video_stream_index = stream->index;
    }
    return true;
 }
 static bool
 sc_recorder_wait_audio_stream(struct sc_recorder *recorder) {
    sc_mutex_lock(&recorder->mutex);
    while (!recorder->audio_codec && !recorder->stopped) {
        sc_cond_wait(&recorder->stream_cond, &recorder->mutex);
    }
    const AVCodec *codec = recorder->audio_codec;
    sc_mutex_unlock(&recorder->mutex);
    if (codec) {
        AVStream *stream = avformat_new_stream(recorder->ctx, codec);
        if (!stream) {
            return false;
        }
        stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
        stream->codecpar->codec_id = codec->id;
        stream->codecpar->ch_layout.nb_channels = 2;
        stream->codecpar->sample_rate = 48000;
        recorder->audio_stream_index = stream->index;
    }
    return true;
 }
 static inline bool
 sc_recorder_has_empty_queues(struct sc_recorder *recorder) {
    if (sc_queue_is_empty(&recorder->video_queue)) {
        // The video queue is empty
        return true;
    }
    if (recorder->audio && sc_queue_is_empty(&recorder->audio_queue)) {
        // The audio queue is empty (when audio is enabled)
        return true;
    }
    // No queue is empty
    return false;
 }
 static bool
 sc_recorder_process_header(struct sc_recorder *recorder) {
    sc_mutex_lock(&recorder->mutex);
-    while (!recorder->stopped && sc_queue_is_empty(&recorder->video_queue)) {
+    while (!recorder->stopped && sc_recorder_has_empty_queues(recorder)) {
        sc_cond_wait(&recorder->queue_cond, &recorder->mutex);
    }
-    if (recorder->stopped && sc_queue_is_empty(&recorder->video_queue)) {
+    if (sc_recorder_has_empty_queues(recorder)) {
        assert(recorder->stopped);
        sc_mutex_unlock(&recorder->mutex);
        return false;
    }
-    struct sc_record_packet *rec;
+    struct sc_record_packet *video_pkt;
-    sc_queue_take(&recorder->video_queue, next, &rec);
+    sc_queue_take(&recorder->video_queue, next, &video_pkt);
    struct sc_record_packet *audio_pkt;
    if (recorder->audio) {
        sc_queue_take(&recorder->audio_queue, next, &audio_pkt);
    }
    sc_mutex_unlock(&recorder->mutex);
-    if (rec->packet->pts != AV_NOPTS_VALUE) {
+    int ret = false;
-        LOGE("The first packet is not a config packet");
+
-        sc_record_packet_delete(rec);
+    if (video_pkt->packet->pts != AV_NOPTS_VALUE) {
-        return false;
+        LOGE("The first video packet is not a config packet");
        goto end;
    }
-    bool ok = sc_recorder_write_header(recorder, rec->packet);
+    assert(recorder->video_stream_index >= 0);
-    sc_record_packet_delete(rec);
+    AVStream *video_stream =
        recorder->ctx->streams[recorder->video_stream_index];
    bool ok = sc_recorder_set_extradata(video_stream, video_pkt->packet);
    if (!ok) {
        goto end;
    }
    if (recorder->audio) {
        if (audio_pkt->packet->pts != AV_NOPTS_VALUE) {
            LOGE("The first audio packet is not a config packet");
            goto end;
        }
        assert(recorder->audio_stream_index >= 0);
        AVStream *audio_stream =
            recorder->ctx->streams[recorder->audio_stream_index];
        ok = sc_recorder_set_extradata(audio_stream, audio_pkt->packet);
        if (!ok) {
            goto end;
        }
    }
    ok = avformat_write_header(recorder->ctx, NULL) >= 0;
    if (!ok) {
        LOGE("Failed to write header to %s", recorder->filename);
-        return false;
+        goto end;
    }
-    return true;
+    ret = true;
 end:
    sc_record_packet_delete(video_pkt);
    if (recorder->audio) {
        sc_record_packet_delete(audio_pkt);
    }
    return ret;
 }
 static bool
 sc_recorder_process_packets(struct sc_recorder *recorder) {
    int64_t pts_origin = AV_NOPTS_VALUE;
    // We can write a packet only once we received the next one so that we can
    // set its duration (next_pts - current_pts)
    struct sc_record_packet *previous = NULL;
    bool header_written = sc_recorder_process_header(recorder);
    if (!header_written) {
        return false;
    }
    struct sc_record_packet *video_pkt = NULL;
    struct sc_record_packet *audio_pkt = NULL;
    // We can write a video packet only once we received the next one so that
    // we can set its duration (next_pts - current_pts)
    struct sc_record_packet *video_pkt_previous = NULL;
    bool error = false;
    for (;;) {
        sc_mutex_lock(&recorder->mutex);
-        while (!recorder->stopped
+        while (!recorder->stopped) {
-                && sc_queue_is_empty(&recorder->video_queue)) {
+            if (!video_pkt && !sc_queue_is_empty(&recorder->video_queue)) {
                // A new packet may be assigned to video_pkt and be processed
                break;
            }
            if (recorder->audio && !audio_pkt
                    && !sc_queue_is_empty(&recorder->audio_queue)) {
                // A new packet may be assigned to audio_pkt and be processed
                break;
            }
            sc_cond_wait(&recorder->queue_cond, &recorder->mutex);
        }
-        // if stopped is set, continue to process the remaining events (to
+        // If stopped is set, continue to process the remaining events (to
-        // finish the recording) before actually stopping
+        // finish the recording) before actually stopping.
-        if (recorder->stopped && sc_queue_is_empty(&recorder->video_queue)) {
+        // If there is no audio, then the audio_queue will remain empty forever
        // and audio_pkt will always be NULL.
        assert(recorder->audio
                || (!audio_pkt && sc_queue_is_empty(&recorder->audio_queue)));
        if (!video_pkt && !sc_queue_is_empty(&recorder->video_queue)) {
            sc_queue_take(&recorder->video_queue, next, &video_pkt);
        }
        if (!audio_pkt && !sc_queue_is_empty(&recorder->audio_queue)) {
            sc_queue_take(&recorder->audio_queue, next, &audio_pkt);
        }
        if (recorder->stopped && !video_pkt && !audio_pkt) {
            assert(sc_queue_is_empty(&recorder->video_queue));
            assert(sc_queue_is_empty(&recorder->audio_queue));
            sc_mutex_unlock(&recorder->mutex);
            break;
        }
-        struct sc_record_packet *rec;
+        assert(video_pkt || audio_pkt); // at least one
        sc_queue_take(&recorder->video_queue, next, &rec);
        sc_mutex_unlock(&recorder->mutex);
        if (rec->packet->pts == AV_NOPTS_VALUE) {
        // Ignore further config packets (e.g. on device orientation
        // change). The next non-config packet will have the config packet
        // data prepended.
-            sc_record_packet_delete(rec);
+        if (video_pkt && video_pkt->packet->pts == AV_NOPTS_VALUE) {
-        } else {
+            sc_record_packet_delete(video_pkt);
-            assert(rec->packet->pts != AV_NOPTS_VALUE);
+            video_pkt = NULL;
        }
-            if (!previous) {
+        if (audio_pkt && audio_pkt->packet->pts == AV_NOPTS_VALUE) {
-                // This is the first non-config packet
+            sc_record_packet_delete(audio_pkt);
-                assert(pts_origin == AV_NOPTS_VALUE);
+            audio_pkt= NULL;
-                pts_origin = rec->packet->pts;
+        }
-                rec->packet->pts = 0;
+
-                rec->packet->dts = 0;
+        if (pts_origin == AV_NOPTS_VALUE) {
-                previous = rec;
+            if (!recorder->audio) {
                assert(video_pkt);
                pts_origin = video_pkt->packet->pts;
            } else if (video_pkt && audio_pkt) {
                pts_origin =
                    MIN(video_pkt->packet->pts, audio_pkt->packet->pts);
            } else {
                // We need both video and audio packets to initialize pts_origin
                continue;
            }
        }
            assert(previous);
        assert(pts_origin != AV_NOPTS_VALUE);
-            rec->packet->pts -= pts_origin;
+        if (video_pkt) {
-            rec->packet->dts = rec->packet->pts;
+            video_pkt->packet->pts -= pts_origin;
            video_pkt->packet->dts = video_pkt->packet->pts;
            if (video_pkt_previous) {
                // we now know the duration of the previous packet
-            previous->packet->duration =
+                video_pkt_previous->packet->duration =
-                rec->packet->pts - previous->packet->pts;
+                    video_pkt->packet->pts - video_pkt_previous->packet->pts;
-            bool ok = sc_recorder_write(recorder, previous->packet);
+                bool ok = sc_recorder_write_video(recorder,
-            sc_record_packet_delete(previous);
+                                                  video_pkt_previous->packet);
                sc_record_packet_delete(video_pkt_previous);
                if (!ok) {
-                LOGE("Could not record packet");
+                    LOGE("Could not record video packet");
-                return false;
+                    error = true;
-            }
+                    goto end;
            previous = rec;
                }
            }
-    // Write the last packet
+            video_pkt_previous = video_pkt;
-    struct sc_record_packet *last = previous;
+            video_pkt = NULL;
        }
        if (audio_pkt) {
            audio_pkt->packet->pts -= pts_origin;
            audio_pkt->packet->dts = audio_pkt->packet->pts;
            bool ok = sc_recorder_write_audio(recorder, audio_pkt->packet);
            if (!ok) {
                LOGE("Could not record audio packet");
                error = true;
                goto end;
            }
            sc_record_packet_delete(audio_pkt);
            audio_pkt = NULL;
        }
    }
    // Write the last video packet
    struct sc_record_packet *last = video_pkt_previous;
    if (last) {
        // assign an arbitrary duration to the last packet
        last->packet->duration = 100000;
-        bool ok = sc_recorder_write(recorder, last->packet);
+        bool ok = sc_recorder_write_video(recorder, last->packet);
        if (!ok) {
            // failing to write the last frame is not very serious, no
            // future frame may depend on it, so the resulting file
@ -302,10 +450,18 @@ sc_recorder_process_packets(struct sc_recorder *recorder) {
    int ret = av_write_trailer(recorder->ctx);
    if (ret < 0) {
        LOGE("Failed to write trailer to %s", recorder->filename);
-        return false;
+        error = false;
    }
-    return true;
+end:
    if (video_pkt) {
        sc_record_packet_delete(video_pkt);
    }
    if (audio_pkt) {
        sc_record_packet_delete(audio_pkt);
    }
    return !error;
 }
 static bool
@ -321,6 +477,14 @@ sc_recorder_record(struct sc_recorder *recorder) {
        return false;
    }
    if (recorder->audio) {
        ok = sc_recorder_wait_audio_stream(recorder);
        if (!ok) {
            sc_recorder_close_output_file(recorder);
            return false;
        }
    }
    // If recorder->stopped, process any queued packet anyway
    ok = sc_recorder_process_packets(recorder);
@ -339,6 +503,7 @@ run_recorder(void *data) {
    recorder->stopped = true;
    // Discard pending packets
    sc_recorder_queue_clear(&recorder->video_queue);
    sc_recorder_queue_clear(&recorder->audio_queue);
    sc_mutex_unlock(&recorder->mutex);
    if (success) {
@ -406,6 +571,8 @@ sc_recorder_video_packet_sink_push(struct sc_packet_sink *sink,
        return false;
    }
    rec->packet->stream_index = recorder->video_stream_index;
    sc_queue_push(&recorder->video_queue, next, rec);
    sc_cond_signal(&recorder->queue_cond);
@ -413,9 +580,66 @@ sc_recorder_video_packet_sink_push(struct sc_packet_sink *sink,
    return true;
 }
 static bool
 sc_recorder_audio_packet_sink_open(struct sc_packet_sink *sink,
                                   const AVCodec *codec) {
    struct sc_recorder *recorder = DOWNCAST_AUDIO(sink);
    assert(recorder->audio);
    assert(codec);
    sc_mutex_lock(&recorder->mutex);
    recorder->audio_codec = codec;
    sc_cond_signal(&recorder->stream_cond);
    sc_mutex_unlock(&recorder->mutex);
    return true;
 }
 static void
 sc_recorder_audio_packet_sink_close(struct sc_packet_sink *sink) {
    struct sc_recorder *recorder = DOWNCAST_AUDIO(sink);
    assert(recorder->audio);
    sc_mutex_lock(&recorder->mutex);
    // EOS also stops the recorder
    recorder->stopped = true;
    sc_cond_signal(&recorder->queue_cond);
    sc_mutex_unlock(&recorder->mutex);
 }
 static bool
 sc_recorder_audio_packet_sink_push(struct sc_packet_sink *sink,
                                   const AVPacket *packet) {
    struct sc_recorder *recorder = DOWNCAST_AUDIO(sink);
    assert(recorder->audio);
    sc_mutex_lock(&recorder->mutex);
    if (recorder->stopped) {
        // reject any new packet
        sc_mutex_unlock(&recorder->mutex);
        return false;
    }
    struct sc_record_packet *rec = sc_record_packet_new(packet);
    if (!rec) {
        LOG_OOM();
        sc_mutex_unlock(&recorder->mutex);
        return false;
    }
    rec->packet->stream_index = recorder->audio_stream_index;
    sc_queue_push(&recorder->audio_queue, next, rec);
    sc_cond_signal(&recorder->queue_cond);
    sc_mutex_unlock(&recorder->mutex);
    return true;
 }
 bool
 sc_recorder_init(struct sc_recorder *recorder, const char *filename,
-                 enum sc_record_format format,
+                 enum sc_record_format format, bool audio,
                 struct sc_size declared_frame_size,
                 const struct sc_recorder_callbacks *cbs, void *cbs_userdata) {
    recorder->filename = strdup(filename);
@ -439,10 +663,17 @@ sc_recorder_init(struct sc_recorder *recorder, const char *filename,
        goto error_queue_cond_destroy;
    }
    recorder->audio = audio;
    sc_queue_init(&recorder->video_queue);
    sc_queue_init(&recorder->audio_queue);
    recorder->stopped = false;
    recorder->video_codec = NULL;
    recorder->audio_codec = NULL;
    recorder->video_stream_index = -1;
    recorder->audio_stream_index = -1;
    recorder->format = format;
    recorder->declared_frame_size = declared_frame_size;
@ -459,6 +690,16 @@ sc_recorder_init(struct sc_recorder *recorder, const char *filename,
    recorder->video_packet_sink.ops = &video_ops;
    if (audio) {
        static const struct sc_packet_sink_ops audio_ops = {
            .open = sc_recorder_audio_packet_sink_open,
            .close = sc_recorder_audio_packet_sink_close,
            .push = sc_recorder_audio_packet_sink_push,
        };
        recorder->audio_packet_sink.ops = &audio_ops;
    }
    return true;
 error_queue_cond_destroy:
--- a/app/src/recorder.h
+++ b/app/src/recorder.h
@ -20,7 +20,10 @@ struct sc_record_packet {
 struct sc_recorder_queue SC_QUEUE(struct sc_record_packet);
 struct sc_recorder {
-    struct sc_packet_sink video_packet_sink; // packet sink trait
+    struct sc_packet_sink video_packet_sink;
    struct sc_packet_sink audio_packet_sink;
    bool audio;
    char *filename;
    enum sc_record_format format;
@ -33,10 +36,15 @@ struct sc_recorder {
    // set on sc_recorder_stop(), packet_sink close or recording failure
    bool stopped;
    struct sc_recorder_queue video_queue;
    struct sc_recorder_queue audio_queue;
-    // wake up the recorder thread once the video codec is known
+    // wake up the recorder thread once the video or audio codec is known
    sc_cond stream_cond;
    const AVCodec *video_codec;
    const AVCodec *audio_codec;
    int video_stream_index;
    int audio_stream_index;
    const struct sc_recorder_callbacks *cbs;
    void *cbs_userdata;
@ -49,7 +57,7 @@ struct sc_recorder_callbacks {
 bool
 sc_recorder_init(struct sc_recorder *recorder, const char *filename,
-                 enum sc_record_format format,
+                 enum sc_record_format format, bool audio,
                 struct sc_size declared_frame_size,
                 const struct sc_recorder_callbacks *cbs, void *cbs_userdata);
--- a/app/src/scrcpy.c
+++ b/app/src/scrcpy.c
@ -424,8 +424,8 @@ scrcpy(struct scrcpy_options *options) {
            .on_ended = sc_recorder_on_ended,
        };
        if (!sc_recorder_init(&s->recorder, options->record_filename,
-                              options->record_format, info->frame_size,
+                              options->record_format, options->audio,
-                              &recorder_cbs, NULL)) {
+                              info->frame_size, &recorder_cbs, NULL)) {
            goto end;
        }
        recorder_initialized = true;
@ -436,6 +436,10 @@ scrcpy(struct scrcpy_options *options) {
        recorder_started = true;
        sc_demuxer_add_sink(&s->video_demuxer, &s->recorder.video_packet_sink);
        if (options->audio) {
            sc_demuxer_add_sink(&s->audio_demuxer,
                                &s->recorder.audio_packet_sink);
        }
    }
    struct sc_controller *controller = NULL;