Add --audio-output-buffer

On some systems, the SDL audio callback is not called frequently enough (for example it requests 5ms of samples every 10ms), because the output buffer is too small. By default, we want to use a small value (5ms) to minimize latency and buffer underrun, but if it does not work well, users need a way to increase it. Refs #3793 <https://github.com/Genymobile/scrcpy/issues/3793>
2023-03-13 09:23:02 +01:00 · 2023-03-13 09:23:02 +01:00 · 39544f34b4
commit 39544f34b4
parent 4755b97908
10 changed files with 86 additions and 21 deletions
--- a/app/data/bash-completion/scrcpy
+++ b/app/data/bash-completion/scrcpy
@ -7,6 +7,7 @@ _scrcpy() {
        --audio-codec=
        --audio-codec-options=
        --audio-encoder=
+        --audio-output-buffer=
        -b --video-bit-rate=
        --crop=
        -d --select-usb
@ -121,6 +122,7 @@ _scrcpy() {
        |-b|--video-bit-rate \
        |--audio-codec-options \
        |--audio-encoder \
+        |--audio-output-buffer \
        |--crop \
        |--display \
        |--display-buffer \
--- a/app/data/zsh-completion/_scrcpy
+++ b/app/data/zsh-completion/_scrcpy
@ -14,6 +14,7 @@ arguments=(
    '--audio-codec=[Select the audio codec]:codec:(opus aac raw)'
    '--audio-codec-options=[Set a list of comma-separated key\:type=value options for the device audio encoder]'
    '--audio-encoder=[Use a specific MediaCodec audio encoder]'
+    '--audio-output-buffer=[Configure the size of the SDL audio output buffer (in milliseconds)]'
    {-b,--video-bit-rate=}'[Encode the video at the given bit-rate]'
    '--crop=[\[width\:height\:x\:y\] Crop the device screen on the server]'
    {-d,--select-usb}'[Use USB device]'
--- a/app/scrcpy.1
+++ b/app/scrcpy.1
@ -33,6 +33,14 @@ Lower values decrease the latency, but increase the likelyhood of buffer underru

 Default is 50.

+.TP
+.BI "\-\-audio\-output\-buffer ms
+Configure the size of the SDL audio output buffer (in milliseconds).
+
+If you get "robotic" audio playback, you should test with a higher value (10). Do not change this setting otherwise.
+
+Default is 5.
+
 .TP
 .BI "\-\-audio\-codec " name
 Select an audio codec (opus, aac or raw).
--- a/app/src/audio_player.c
+++ b/app/src/audio_player.c
@ -59,8 +59,6 @@
 #define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_FLT
 #define SC_SDL_SAMPLE_FMT AUDIO_F32

-#define SC_AUDIO_OUTPUT_BUFFER_MS 5
-
 #define TO_BYTES(SAMPLES) sc_audiobuf_to_bytes(&ap->buf, (SAMPLES))
 #define TO_SAMPLES(BYTES) sc_audiobuf_to_samples(&ap->buf, (BYTES))

@ -230,7 +228,7 @@ sc_audio_player_frame_sink_push(struct sc_frame_sink *sink,

    if (played) {
        uint32_t max_buffered_samples = ap->target_buffering
-                + 12 * SC_AUDIO_OUTPUT_BUFFER_MS * ap->sample_rate / 1000
+                                      + 12 * ap->output_buffer
                                      + ap->target_buffering / 10;
        if (buffered_samples > max_buffered_samples) {
            uint32_t skip_samples = buffered_samples - max_buffered_samples;
@ -246,7 +244,7 @@ sc_audio_player_frame_sink_push(struct sc_frame_sink *sink,
        // max_initial_buffering samples, this would cause unnecessary delay
        // (and glitches to compensate) on start.
        uint32_t max_initial_buffering = ap->target_buffering
-                + 2 * SC_AUDIO_OUTPUT_BUFFER_MS * ap->sample_rate / 1000;
+                                       + 2 * ap->output_buffer;
        if (buffered_samples > max_initial_buffering) {
            uint32_t skip_samples = buffered_samples - max_initial_buffering;
            sc_audiobuf_skip(&ap->buf, skip_samples);
@ -333,11 +331,28 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
    unsigned nb_channels = tmp;
 #endif

+    assert(ctx->sample_rate > 0);
+    assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT));
+    int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT);
+    assert(out_bytes_per_sample > 0);
+
+    ap->sample_rate = ctx->sample_rate;
+    ap->nb_channels = nb_channels;
+    ap->out_bytes_per_sample = out_bytes_per_sample;
+
+    ap->target_buffering = ap->target_buffering_delay * ap->sample_rate
+                                                      / SC_TICK_FREQ;
+
+    uint64_t aout_samples = ap->output_buffer_duration * ap->sample_rate
+                                                       / SC_TICK_FREQ;
+    assert(aout_samples <= 0xFFFF);
+    ap->output_buffer = (uint16_t) aout_samples;
+
    SDL_AudioSpec desired = {
        .freq = ctx->sample_rate,
        .format = SC_SDL_SAMPLE_FMT,
        .channels = nb_channels,
-        .samples = SC_AUDIO_OUTPUT_BUFFER_MS * ctx->sample_rate / 1000,
+        .samples = aout_samples,
        .callback = sc_audio_player_sdl_callback,
        .userdata = ap,
    };
@ -356,11 +371,6 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
    }
    ap->swr_ctx = swr_ctx;

-    assert(ctx->sample_rate > 0);
-    assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT));
-    int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT);
-    assert(out_bytes_per_sample > 0);
-
 #ifdef SCRCPY_LAVU_HAS_CHLAYOUT
    av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0);
    av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0);
@ -383,13 +393,6 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
        goto error_free_swr_ctx;
    }

-    ap->sample_rate = ctx->sample_rate;
-    ap->nb_channels = nb_channels;
-    ap->out_bytes_per_sample = out_bytes_per_sample;
-
-    ap->target_buffering = ap->target_buffering_delay * ap->sample_rate
-                                                      / SC_TICK_FREQ;
-
    // Use a ring-buffer of the target buffering size plus 1 second between the
    // producer and the consumer. It's too big on purpose, to guarantee that
    // the producer and the consumer will be able to access it in parallel
@ -458,8 +461,10 @@ sc_audio_player_frame_sink_close(struct sc_frame_sink *sink) {
 }

 void
-sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering) {
+sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering,
+                     sc_tick output_buffer_duration) {
    ap->target_buffering_delay = target_buffering;
+    ap->output_buffer_duration = output_buffer_duration;

    static const struct sc_frame_sink_ops ops = {
        .open = sc_audio_player_frame_sink_open,
--- a/app/src/audio_player.h
+++ b/app/src/audio_player.h
@ -27,6 +27,10 @@ struct sc_audio_player {
    sc_tick target_buffering_delay;
    uint32_t target_buffering; // in samples

+    // SDL audio output buffer size.
+    sc_tick output_buffer_duration;
+    uint16_t output_buffer;
+
    // Audio buffer to communicate between the receiver and the SDL audio
    // callback (protected by SDL_AudioDeviceLock())
    struct sc_audiobuf buf;
@ -80,6 +84,7 @@ struct sc_audio_player_callbacks {
 };

 void
-sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering);
+sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering,
+                     sc_tick audio_output_buffer);

 #endif
--- a/app/src/cli.c
+++ b/app/src/cli.c
@ -71,6 +71,7 @@ enum {
    OPT_LIST_DISPLAYS,
    OPT_REQUIRE_AUDIO,
    OPT_AUDIO_BUFFER,
+    OPT_AUDIO_OUTPUT_BUFFER,
 };

 struct sc_option {
@ -129,6 +130,16 @@ static const struct sc_option options[] = {
                "likelyhood of buffer underrun (causing audio glitches).\n"
                "Default is 50.",
    },
+    {
+        .longopt_id = OPT_AUDIO_OUTPUT_BUFFER,
+        .longopt = "audio-output-buffer",
+        .argdesc = "ms",
+        .text = "Configure the size of the SDL audio output buffer (in "
+                "milliseconds).\n"
+                "If you get \"robotic\" audio playback, you should test with "
+                "a higher value (10). Do not change this setting otherwise.\n"
+                "Default is 5.",
+    },
    {
        .longopt_id = OPT_AUDIO_CODEC,
        .longopt = "audio-codec",
@ -1204,6 +1215,19 @@ parse_buffering_time(const char *s, sc_tick *tick) {
    return true;
 }

+static bool
+parse_audio_output_buffer(const char *s, sc_tick *tick) {
+    long value;
+    bool ok = parse_integer_arg(s, &value, false, 0, 1000,
+                                "audio output buffer");
+    if (!ok) {
+        return false;
+    }
+
+    *tick = SC_TICK_FROM_MS(value);
+    return true;
+}
+
 static bool
 parse_lock_video_orientation(const char *s,
                             enum sc_lock_video_orientation *lock_mode) {
@ -1831,6 +1855,12 @@ parse_args_with_getopt(struct scrcpy_cli_args *args, int argc, char *argv[],
                    return false;
                }
                break;
+            case OPT_AUDIO_OUTPUT_BUFFER:
+                if (!parse_audio_output_buffer(optarg,
+                                               &opts->audio_output_buffer)) {
+                    return false;
+                }
+                break;
            default:
                // getopt prints the error message on stderr
                return false;
--- a/app/src/options.c
+++ b/app/src/options.c
@ -44,6 +44,7 @@ const struct scrcpy_options scrcpy_options_default = {
    .display_buffer = 0,
    .v4l2_buffer = 0,
    .audio_buffer = SC_TICK_FROM_MS(50),
+    .audio_output_buffer = SC_TICK_FROM_MS(5),
 #ifdef HAVE_USB
    .otg = false,
 #endif
--- a/app/src/options.h
+++ b/app/src/options.h
@ -127,6 +127,7 @@ struct scrcpy_options {
    sc_tick display_buffer;
    sc_tick v4l2_buffer;
    sc_tick audio_buffer;
+    sc_tick audio_output_buffer;
 #ifdef HAVE_USB
    bool otg;
 #endif
--- a/app/src/scrcpy.c
+++ b/app/src/scrcpy.c
@ -688,7 +688,8 @@ aoa_hid_end:
        sc_frame_source_add_sink(src, &s->screen.frame_sink);

        if (options->audio) {
-            sc_audio_player_init(&s->audio_player, options->audio_buffer);
+            sc_audio_player_init(&s->audio_player, options->audio_buffer,
+                                 options->audio_output_buffer);
            sc_frame_source_add_sink(&s->audio_decoder.frame_source,
                                     &s->audio_player.frame_sink);
        }
--- a/doc/audio.md
+++ b/doc/audio.md
@ -88,3 +88,14 @@ avoid glitches and smooth the playback:
 ```
 scrcpy --display-buffer=200 --audio-buffer=200
 ```
+
+It is also possible to configure another audio buffer (the audio output buffer),
+by default set to 5ms. Don't change it, unless you get some [robotic and glitchy
+sound][#3793]:
+
+```bash
+# Only if absolutely necessary
+scrcpy --audio-output-buffer=10
+```
+
+[#3793]: https://github.com/Genymobile/scrcpy/issues/3793