diff --git a/app/data/bash-completion/scrcpy b/app/data/bash-completion/scrcpy
index 3aa991b8..ae516c34 100644
--- a/app/data/bash-completion/scrcpy
+++ b/app/data/bash-completion/scrcpy
@@ -7,6 +7,7 @@ _scrcpy() {
         --audio-codec=
         --audio-codec-options=
         --audio-encoder=
+        --audio-output-buffer=
         -b --video-bit-rate=
         --crop=
         -d --select-usb
@@ -121,6 +122,7 @@ _scrcpy() {
         |-b|--video-bit-rate \
         |--audio-codec-options \
         |--audio-encoder \
+        |--audio-output-buffer \
         |--crop \
         |--display \
         |--display-buffer \
diff --git a/app/data/zsh-completion/_scrcpy b/app/data/zsh-completion/_scrcpy
index e6a3bc2a..97bf4f3e 100644
--- a/app/data/zsh-completion/_scrcpy
+++ b/app/data/zsh-completion/_scrcpy
@@ -14,6 +14,7 @@ arguments=(
     '--audio-codec=[Select the audio codec]:codec:(opus aac raw)'
     '--audio-codec-options=[Set a list of comma-separated key\:type=value options for the device audio encoder]'
     '--audio-encoder=[Use a specific MediaCodec audio encoder]'
+    '--audio-output-buffer=[Configure the size of the SDL audio output buffer (in milliseconds)]'
     {-b,--video-bit-rate=}'[Encode the video at the given bit-rate]'
     '--crop=[\[width\:height\:x\:y\] Crop the device screen on the server]'
     {-d,--select-usb}'[Use USB device]'
diff --git a/app/scrcpy.1 b/app/scrcpy.1
index 65357686..97a15d1d 100644
--- a/app/scrcpy.1
+++ b/app/scrcpy.1
@@ -33,6 +33,14 @@ Lower values decrease the latency, but increase the likelyhood of buffer underru
 
 Default is 50.
 
+.TP
+.BI "\-\-audio\-output\-buffer ms
+Configure the size of the SDL audio output buffer (in milliseconds).
+
+If you get "robotic" audio playback, you should test with a higher value (10). Do not change this setting otherwise.
+
+Default is 5.
+
 .TP
 .BI "\-\-audio\-codec " name
 Select an audio codec (opus, aac or raw).
diff --git a/app/src/audio_player.c b/app/src/audio_player.c
index bba39acb..a0c52c62 100644
--- a/app/src/audio_player.c
+++ b/app/src/audio_player.c
@@ -59,8 +59,6 @@
 #define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_FLT
 #define SC_SDL_SAMPLE_FMT AUDIO_F32
 
-#define SC_AUDIO_OUTPUT_BUFFER_MS 5
-
 #define TO_BYTES(SAMPLES) sc_audiobuf_to_bytes(&ap->buf, (SAMPLES))
 #define TO_SAMPLES(BYTES) sc_audiobuf_to_samples(&ap->buf, (BYTES))
 
@@ -230,8 +228,8 @@ sc_audio_player_frame_sink_push(struct sc_frame_sink *sink,
 
     if (played) {
         uint32_t max_buffered_samples = ap->target_buffering
-                + 12 * SC_AUDIO_OUTPUT_BUFFER_MS * ap->sample_rate / 1000
-                + ap->target_buffering / 10;
+                                      + 12 * ap->output_buffer
+                                      + ap->target_buffering / 10;
         if (buffered_samples > max_buffered_samples) {
             uint32_t skip_samples = buffered_samples - max_buffered_samples;
             sc_audiobuf_skip(&ap->buf, skip_samples);
@@ -246,7 +244,7 @@ sc_audio_player_frame_sink_push(struct sc_frame_sink *sink,
         // max_initial_buffering samples, this would cause unnecessary delay
         // (and glitches to compensate) on start.
         uint32_t max_initial_buffering = ap->target_buffering
-                + 2 * SC_AUDIO_OUTPUT_BUFFER_MS * ap->sample_rate / 1000;
+                                       + 2 * ap->output_buffer;
         if (buffered_samples > max_initial_buffering) {
             uint32_t skip_samples = buffered_samples - max_initial_buffering;
             sc_audiobuf_skip(&ap->buf, skip_samples);
@@ -333,11 +331,28 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
     unsigned nb_channels = tmp;
 #endif
 
+    assert(ctx->sample_rate > 0);
+    assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT));
+    int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT);
+    assert(out_bytes_per_sample > 0);
+
+    ap->sample_rate = ctx->sample_rate;
+    ap->nb_channels = nb_channels;
+    ap->out_bytes_per_sample = out_bytes_per_sample;
+
+    ap->target_buffering = ap->target_buffering_delay * ap->sample_rate
+                                                      / SC_TICK_FREQ;
+
+    uint64_t aout_samples = ap->output_buffer_duration * ap->sample_rate
+                                                       / SC_TICK_FREQ;
+    assert(aout_samples <= 0xFFFF);
+    ap->output_buffer = (uint16_t) aout_samples;
+
     SDL_AudioSpec desired = {
         .freq = ctx->sample_rate,
         .format = SC_SDL_SAMPLE_FMT,
         .channels = nb_channels,
-        .samples = SC_AUDIO_OUTPUT_BUFFER_MS * ctx->sample_rate / 1000,
+        .samples = aout_samples,
         .callback = sc_audio_player_sdl_callback,
         .userdata = ap,
     };
@@ -356,11 +371,6 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
     }
     ap->swr_ctx = swr_ctx;
 
-    assert(ctx->sample_rate > 0);
-    assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT));
-    int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT);
-    assert(out_bytes_per_sample > 0);
-
 #ifdef SCRCPY_LAVU_HAS_CHLAYOUT
     av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0);
     av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0);
@@ -383,13 +393,6 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
         goto error_free_swr_ctx;
     }
 
-    ap->sample_rate = ctx->sample_rate;
-    ap->nb_channels = nb_channels;
-    ap->out_bytes_per_sample = out_bytes_per_sample;
-
-    ap->target_buffering = ap->target_buffering_delay * ap->sample_rate
-                                                      / SC_TICK_FREQ;
-
     // Use a ring-buffer of the target buffering size plus 1 second between the
     // producer and the consumer. It's too big on purpose, to guarantee that
     // the producer and the consumer will be able to access it in parallel
@@ -458,8 +461,10 @@ sc_audio_player_frame_sink_close(struct sc_frame_sink *sink) {
 }
 
 void
-sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering) {
+sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering,
+                     sc_tick output_buffer_duration) {
     ap->target_buffering_delay = target_buffering;
+    ap->output_buffer_duration = output_buffer_duration;
 
     static const struct sc_frame_sink_ops ops = {
         .open = sc_audio_player_frame_sink_open,
diff --git a/app/src/audio_player.h b/app/src/audio_player.h
index 4dd9c4dc..a03e9e35 100644
--- a/app/src/audio_player.h
+++ b/app/src/audio_player.h
@@ -27,6 +27,10 @@ struct sc_audio_player {
     sc_tick target_buffering_delay;
     uint32_t target_buffering; // in samples
 
+    // SDL audio output buffer size.
+    sc_tick output_buffer_duration;
+    uint16_t output_buffer;
+
     // Audio buffer to communicate between the receiver and the SDL audio
     // callback (protected by SDL_AudioDeviceLock())
     struct sc_audiobuf buf;
@@ -80,6 +84,7 @@ struct sc_audio_player_callbacks {
 };
 
 void
-sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering);
+sc_audio_player_init(struct sc_audio_player *ap, sc_tick target_buffering,
+                     sc_tick audio_output_buffer);
 
 #endif
diff --git a/app/src/cli.c b/app/src/cli.c
index cb101a51..d6d9f41d 100644
--- a/app/src/cli.c
+++ b/app/src/cli.c
@@ -71,6 +71,7 @@ enum {
     OPT_LIST_DISPLAYS,
     OPT_REQUIRE_AUDIO,
     OPT_AUDIO_BUFFER,
+    OPT_AUDIO_OUTPUT_BUFFER,
 };
 
 struct sc_option {
@@ -129,6 +130,16 @@ static const struct sc_option options[] = {
                 "likelyhood of buffer underrun (causing audio glitches).\n"
                 "Default is 50.",
     },
+    {
+        .longopt_id = OPT_AUDIO_OUTPUT_BUFFER,
+        .longopt = "audio-output-buffer",
+        .argdesc = "ms",
+        .text = "Configure the size of the SDL audio output buffer (in "
+                "milliseconds).\n"
+                "If you get \"robotic\" audio playback, you should test with "
+                "a higher value (10). Do not change this setting otherwise.\n"
+                "Default is 5.",
+    },
     {
         .longopt_id = OPT_AUDIO_CODEC,
         .longopt = "audio-codec",
@@ -1204,6 +1215,19 @@ parse_buffering_time(const char *s, sc_tick *tick) {
     return true;
 }
 
+static bool
+parse_audio_output_buffer(const char *s, sc_tick *tick) {
+    long value;
+    bool ok = parse_integer_arg(s, &value, false, 0, 1000,
+                                "audio output buffer");
+    if (!ok) {
+        return false;
+    }
+
+    *tick = SC_TICK_FROM_MS(value);
+    return true;
+}
+
 static bool
 parse_lock_video_orientation(const char *s,
                              enum sc_lock_video_orientation *lock_mode) {
@@ -1831,6 +1855,12 @@ parse_args_with_getopt(struct scrcpy_cli_args *args, int argc, char *argv[],
                     return false;
                 }
                 break;
+            case OPT_AUDIO_OUTPUT_BUFFER:
+                if (!parse_audio_output_buffer(optarg,
+                                               &opts->audio_output_buffer)) {
+                    return false;
+                }
+                break;
             default:
                 // getopt prints the error message on stderr
                 return false;
diff --git a/app/src/options.c b/app/src/options.c
index 68c16d53..8b99f6f3 100644
--- a/app/src/options.c
+++ b/app/src/options.c
@@ -44,6 +44,7 @@ const struct scrcpy_options scrcpy_options_default = {
     .display_buffer = 0,
     .v4l2_buffer = 0,
     .audio_buffer = SC_TICK_FROM_MS(50),
+    .audio_output_buffer = SC_TICK_FROM_MS(5),
 #ifdef HAVE_USB
     .otg = false,
 #endif
diff --git a/app/src/options.h b/app/src/options.h
index 06b4ddfa..c41e2757 100644
--- a/app/src/options.h
+++ b/app/src/options.h
@@ -127,6 +127,7 @@ struct scrcpy_options {
     sc_tick display_buffer;
     sc_tick v4l2_buffer;
     sc_tick audio_buffer;
+    sc_tick audio_output_buffer;
 #ifdef HAVE_USB
     bool otg;
 #endif
diff --git a/app/src/scrcpy.c b/app/src/scrcpy.c
index 9e5ec6f0..efa69d31 100644
--- a/app/src/scrcpy.c
+++ b/app/src/scrcpy.c
@@ -688,7 +688,8 @@ aoa_hid_end:
         sc_frame_source_add_sink(src, &s->screen.frame_sink);
 
         if (options->audio) {
-            sc_audio_player_init(&s->audio_player, options->audio_buffer);
+            sc_audio_player_init(&s->audio_player, options->audio_buffer,
+                                 options->audio_output_buffer);
             sc_frame_source_add_sink(&s->audio_decoder.frame_source,
                                      &s->audio_player.frame_sink);
         }
diff --git a/doc/audio.md b/doc/audio.md
index 3755fe37..6e97b103 100644
--- a/doc/audio.md
+++ b/doc/audio.md
@@ -88,3 +88,14 @@ avoid glitches and smooth the playback:
 ```
 scrcpy --display-buffer=200 --audio-buffer=200
 ```
+
+It is also possible to configure another audio buffer (the audio output buffer),
+by default set to 5ms. Don't change it, unless you get some [robotic and glitchy
+sound][#3793]:
+
+```bash
+# Only if absolutely necessary
+scrcpy --audio-output-buffer=10
+```
+
+[#3793]: https://github.com/Genymobile/scrcpy/issues/3793