S3 presigned URLs are signed for a specific HTTP method (typically GET
for our use cases). The curl block driver currently issues a HEAD
request to discover the backend features and the file size, which fails
with 403.
Add a 'force-range' option that skips the HEAD request and instead
issues a minimal GET request (querying 1 byte from the server) to
extract the file size from the 'Content-Range' response header. To
achieve this the 'curl_header_cb' is redesigned to generically parse
HTTP headers.
$ $QEMU -drive driver=http,\
'url=https://s3.example.com/some.img?X-Amz-Security-Token=XXX',
force-range=true
Enabling the 'force-range' option without the backend supporting it is
undefined behavior and untested but the libcurl should ignore the body
and stop reading after the HTTP headers then we would fail with the
expected `Server does not support 'range' (byte ranges).` error.
Signed-off-by: Antoine Damhet <adamhet@scaleway.com>
---
block/curl.c | 104 ++++++++++++++++++--------
block/trace-events | 1 +
docs/system/device-url-syntax.rst.inc | 6 ++
qapi/block-core.json | 14 +++-
4 files changed, 90 insertions(+), 35 deletions(-)
diff --git a/block/curl.c b/block/curl.c
index 6dccf002564e..66aecfb20ec6 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -62,10 +62,12 @@
#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret"
+#define CURL_BLOCK_OPT_FORCE_RANGE "force-range"
#define CURL_BLOCK_OPT_READAHEAD_DEFAULT (256 * 1024)
#define CURL_BLOCK_OPT_SSLVERIFY_DEFAULT true
#define CURL_BLOCK_OPT_TIMEOUT_DEFAULT 5
+#define CURL_BLOCK_OPT_FORCE_RANGE_DEFAULT false
struct BDRVCURLState;
struct CURLState;
@@ -206,27 +208,33 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
{
BDRVCURLState *s = opaque;
size_t realsize = size * nmemb;
- const char *p = ptr;
- const char *end = p + realsize;
- const char *t = "accept-ranges : bytes "; /* A lowercase template */
+ g_autofree char *header = g_strstrip(g_strndup(ptr, realsize));
+ char *val = strchr(header, ':');
- /* check if header matches the "t" template */
- for (;;) {
- if (*t == ' ') { /* space in t matches any amount of isspace in p */
- if (p < end && g_ascii_isspace(*p)) {
- ++p;
- } else {
- ++t;
- }
- } else if (*t && p < end && *t == g_ascii_tolower(*p)) {
- ++p, ++t;
- } else {
- break;
- }
+ if (!val) {
+ return realsize;
}
- if (!*t && p == end) { /* if we managed to reach ends of both strings */
- s->accept_range = true;
+ *val++ = '\0';
+ g_strchomp(header);
+ while (g_ascii_isspace(*val)) {
+ ++val;
+ }
+
+ trace_curl_header_cb(header, val);
+
+ if (!g_ascii_strcasecmp(header, "accept-ranges")) {
+ if (!g_ascii_strcasecmp(val, "bytes")) {
+ s->accept_range = true;
+ }
+ } else if (!g_ascii_strcasecmp(header, "Content-Range")) {
+ /* Content-Range fmt is `bytes begin-end/full_size` */
+ val = strchr(val, '/');
+ if (val) {
+ if (qemu_strtou64(val + 1, NULL, 10, &s->len) < 0) {
+ s->len = UINT64_MAX;
+ }
+ }
}
return realsize;
@@ -668,6 +676,11 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "ID of secret used as password for HTTP proxy auth",
},
+ {
+ .name = CURL_BLOCK_OPT_FORCE_RANGE,
+ .type = QEMU_OPT_BOOL,
+ .help = "Assume HTTP range requests are supported",
+ },
{ /* end of list */ }
},
};
@@ -690,6 +703,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
#endif
const char *secretid;
const char *protocol_delimiter;
+ bool force_range;
int ret;
bdrv_graph_rdlock_main_loop();
@@ -807,35 +821,56 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
}
s->accept_range = false;
+ s->len = UINT64_MAX;
+ force_range = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_FORCE_RANGE,
+ CURL_BLOCK_OPT_FORCE_RANGE_DEFAULT);
+ /*
+ * When minimal CURL will be bumped to `7.83`, the header callback + manual
+ * parsing can be replaced by `curl_easy_header` calls
+ */
if (curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1L) ||
curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION, curl_header_cb) ||
curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s)) {
- pstrcpy(state->errmsg, CURL_ERROR_SIZE,
- "curl library initialization failed.");
- goto out;
+ goto out_init;
+ }
+ if (force_range) {
+ if (curl_easy_setopt(state->curl, CURLOPT_CUSTOMREQUEST, "GET") ||
+ curl_easy_setopt(state->curl, CURLOPT_RANGE, "0-0")) {
+ goto out_init;
+ }
}
+
if (curl_easy_perform(state->curl))
goto out;
- /* CURL 7.55.0 deprecates CURLINFO_CONTENT_LENGTH_DOWNLOAD in favour of
- * the *_T version which returns a more sensible type for content length.
- */
+
+ if (!force_range) {
+ /*
+ * CURL 7.55.0 deprecates CURLINFO_CONTENT_LENGTH_DOWNLOAD in favour of
+ * the *_T version which returns a more sensible type for content
+ * length.
+ */
#if LIBCURL_VERSION_NUM >= 0x073700
- if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &cl)) {
- goto out;
- }
+ if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T,
+ &cl)) {
+ goto out;
+ }
#else
- if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl)) {
- goto out;
- }
+ if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD,
+ &cl)) {
+ goto out;
+ }
#endif
- if (cl < 0) {
+ if (cl >= 0) {
+ s->len = cl;
+ }
+ }
+
+ if (s->len == UINT64_MAX) {
pstrcpy(state->errmsg, CURL_ERROR_SIZE,
"Server didn't report file size.");
goto out;
}
- s->len = cl;
-
if ((!strncasecmp(s->url, "http://", strlen("http://"))
|| !strncasecmp(s->url, "https://", strlen("https://")))
&& !s->accept_range) {
@@ -856,6 +891,9 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
qemu_opts_del(opts);
return 0;
+out_init:
+ pstrcpy(state->errmsg, CURL_ERROR_SIZE,
+ "curl library initialization failed.");
out:
error_setg(errp, "CURL: Error opening file: %s", state->errmsg);
curl_easy_cleanup(state->curl);
diff --git a/block/trace-events b/block/trace-events
index c9b4736ff884..d170fc96f15f 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -191,6 +191,7 @@ ssh_server_status(int status) "server status=%d"
curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld"
curl_sock_cb(int action, int fd) "sock action %d on fd %d"
curl_read_cb(size_t realsize) "just reading %zu bytes"
+curl_header_cb(const char *key, const char *val) "looking at %s: %s"
curl_open(const char *file) "opening %s"
curl_open_size(uint64_t size) "size = %" PRIu64
curl_setup_preadv(uint64_t bytes, uint64_t start, const char *range) "reading %" PRIu64 " at %" PRIu64 " (%s)"
diff --git a/docs/system/device-url-syntax.rst.inc b/docs/system/device-url-syntax.rst.inc
index aae65d138c00..e77032e9e4b6 100644
--- a/docs/system/device-url-syntax.rst.inc
+++ b/docs/system/device-url-syntax.rst.inc
@@ -179,6 +179,12 @@ These are specified using a special URL syntax.
get the size of the image to be downloaded. If not set, the
default timeout of 5 seconds is used.
+ ``force-range``
+ Assume the HTTP backend supports range requests and avoid doing
+ a HTTP HEAD request to discover the feature. Typically S3
+ presigned URLs will only support one method and refuse other
+ requests types.
+
Note that when passing options to qemu explicitly, ``driver`` is the
value of <protocol>.
diff --git a/qapi/block-core.json b/qapi/block-core.json
index b82af7425614..ff018c2d6bfb 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4582,12 +4582,17 @@
# @cookie-secret: ID of a QCryptoSecret object providing the cookie
# data in a secure way. See @cookie for the format. (since 2.10)
#
+# @force-range: Don't issue a HEAD HTTP request to discover if the
+# backend supports range requests and rely only on GET requests.
+# This is especially useful for S3 presigned URLs. (since 11.0)
+#
# Since: 2.9
##
{ 'struct': 'BlockdevOptionsCurlHttp',
'base': 'BlockdevOptionsCurlBase',
'data': { '*cookie': 'str',
- '*cookie-secret': 'str'} }
+ '*cookie-secret': 'str',
+ '*force-range': 'bool'} }
##
# @BlockdevOptionsCurlHttps:
@@ -4605,13 +4610,18 @@
# @cookie-secret: ID of a QCryptoSecret object providing the cookie
# data in a secure way. See @cookie for the format. (since 2.10)
#
+# @force-range: Don't issue a HEAD HTTP request to discover if the
+# backend supports range requests and rely only on GET requests.
+# This is especially useful for S3 presigned URLs. (since 11.0)
+#
# Since: 2.9
##
{ 'struct': 'BlockdevOptionsCurlHttps',
'base': 'BlockdevOptionsCurlBase',
'data': { '*cookie': 'str',
'*sslverify': 'bool',
- '*cookie-secret': 'str'} }
+ '*cookie-secret': 'str',
+ '*force-range': 'bool'} }
##
# @BlockdevOptionsCurlFtp:
--
2.53.0
Antoine Damhet <adamhet@scaleway.com> writes:
> S3 presigned URLs are signed for a specific HTTP method (typically GET
> for our use cases). The curl block driver currently issues a HEAD
> request to discover the backend features and the file size, which fails
> with 403.
>
> Add a 'force-range' option that skips the HEAD request and instead
> issues a minimal GET request (querying 1 byte from the server) to
> extract the file size from the 'Content-Range' response header. To
> achieve this the 'curl_header_cb' is redesigned to generically parse
> HTTP headers.
>
> $ $QEMU -drive driver=http,\
> 'url=https://s3.example.com/some.img?X-Amz-Security-Token=XXX',
> force-range=true
>
> Enabling the 'force-range' option without the backend supporting it is
> undefined behavior and untested
"Undefined behavior" suggests it could do anything, even destroy data.
I hope that's not the case. What is the case?
What is "the backend"? The web server specified with @url?
> but the libcurl should ignore the body
> and stop reading after the HTTP headers then we would fail with the
> expected `Server does not support 'range' (byte ranges).` error.
>
> Signed-off-by: Antoine Damhet <adamhet@scaleway.com>
> ---
[...]
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index b82af7425614..ff018c2d6bfb 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -4582,12 +4582,17 @@
> # @cookie-secret: ID of a QCryptoSecret object providing the cookie
> # data in a secure way. See @cookie for the format. (since 2.10)
> #
> +# @force-range: Don't issue a HEAD HTTP request to discover if the
> +# backend supports range requests and rely only on GET requests.
> +# This is especially useful for S3 presigned URLs. (since 11.0)
Unlike the commit message, this doesn't mention the need for "the
backend" (whatever that may be) supporting it.
> +#
> # Since: 2.9
> ##
> { 'struct': 'BlockdevOptionsCurlHttp',
> 'base': 'BlockdevOptionsCurlBase',
> 'data': { '*cookie': 'str',
> - '*cookie-secret': 'str'} }
> + '*cookie-secret': 'str',
> + '*force-range': 'bool'} }
>
> ##
> # @BlockdevOptionsCurlHttps:
> @@ -4605,13 +4610,18 @@
> # @cookie-secret: ID of a QCryptoSecret object providing the cookie
> # data in a secure way. See @cookie for the format. (since 2.10)
> #
> +# @force-range: Don't issue a HEAD HTTP request to discover if the
> +# backend supports range requests and rely only on GET requests.
> +# This is especially useful for S3 presigned URLs. (since 11.0)
> +#
> # Since: 2.9
> ##
@force-range is is duplicated between BlockdevOptionsCurlHttp and
BlockdevOptionsCurlHttps. @cookie and @cookie-secret is already
duplicated before the patch. Time to factor out a common base type?
> { 'struct': 'BlockdevOptionsCurlHttps',
> 'base': 'BlockdevOptionsCurlBase',
> 'data': { '*cookie': 'str',
> '*sslverify': 'bool',
> - '*cookie-secret': 'str'} }
> + '*cookie-secret': 'str',
> + '*force-range': 'bool'} }
>
> ##
> # @BlockdevOptionsCurlFtp:
On Tue, Feb 17, 2026 at 09:53:11AM +0100, Markus Armbruster wrote:
> Antoine Damhet <adamhet@scaleway.com> writes:
>
> > S3 presigned URLs are signed for a specific HTTP method (typically GET
> > for our use cases). The curl block driver currently issues a HEAD
> > request to discover the backend features and the file size, which fails
> > with 403.
> >
> > Add a 'force-range' option that skips the HEAD request and instead
> > issues a minimal GET request (querying 1 byte from the server) to
> > extract the file size from the 'Content-Range' response header. To
> > achieve this the 'curl_header_cb' is redesigned to generically parse
> > HTTP headers.
> >
> > $ $QEMU -drive driver=http,\
> > 'url=https://s3.example.com/some.img?X-Amz-Security-Token=XXX',
> > force-range=true
> >
> > Enabling the 'force-range' option without the backend supporting it is
> > undefined behavior and untested
>
> "Undefined behavior" suggests it could do anything, even destroy data.
> I hope that's not the case. What is the case?
>
> What is "the backend"? The web server specified with @url?
Undefined behavior was probably too strong of a wording. I have done way
more tests and have a clearer picture of what happens:
The web server for @url will respond with `HTTP 200` and try to send the
whole file. Since we specified `CURLOPT_NOBODY` to the libcurl it stops
reading the socket after the headers and justs shuts it down. The
`force-range` mode is transparent for the user even if it can wastes a
few TCP packets.
I'll rewrite the commit message to reflect the actual behavior of the
option in the v2.
>
> > but the libcurl should ignore the body
> > and stop reading after the HTTP headers then we would fail with the
> > expected `Server does not support 'range' (byte ranges).` error.
> >
> > Signed-off-by: Antoine Damhet <adamhet@scaleway.com>
> > ---
>
> [...]
>
> > diff --git a/qapi/block-core.json b/qapi/block-core.json
> > index b82af7425614..ff018c2d6bfb 100644
> > --- a/qapi/block-core.json
> > +++ b/qapi/block-core.json
> > @@ -4582,12 +4582,17 @@
> > # @cookie-secret: ID of a QCryptoSecret object providing the cookie
> > # data in a secure way. See @cookie for the format. (since 2.10)
> > #
> > +# @force-range: Don't issue a HEAD HTTP request to discover if the
> > +# backend supports range requests and rely only on GET requests.
> > +# This is especially useful for S3 presigned URLs. (since 11.0)
>
> Unlike the commit message, this doesn't mention the need for "the
> backend" (whatever that may be) supporting it.
Will rephrase "the backend" with "the http server". Should I document
the behavior of the http server missing the range requests here or is
the current description sufficient ?
>
> > +#
> > # Since: 2.9
> > ##
> > { 'struct': 'BlockdevOptionsCurlHttp',
> > 'base': 'BlockdevOptionsCurlBase',
> > 'data': { '*cookie': 'str',
> > - '*cookie-secret': 'str'} }
> > + '*cookie-secret': 'str',
> > + '*force-range': 'bool'} }
> >
> > ##
> > # @BlockdevOptionsCurlHttps:
> > @@ -4605,13 +4610,18 @@
> > # @cookie-secret: ID of a QCryptoSecret object providing the cookie
> > # data in a secure way. See @cookie for the format. (since 2.10)
> > #
> > +# @force-range: Don't issue a HEAD HTTP request to discover if the
> > +# backend supports range requests and rely only on GET requests.
> > +# This is especially useful for S3 presigned URLs. (since 11.0)
> > +#
> > # Since: 2.9
> > ##
>
> @force-range is is duplicated between BlockdevOptionsCurlHttp and
> BlockdevOptionsCurlHttps. @cookie and @cookie-secret is already
> duplicated before the patch. Time to factor out a common base type?
This would be only on the QAPI ? looking something like:
```
{ 'struct': 'BlockdevOptionsCurlHttps',
- 'base': 'BlockdevOptionsCurlBase',
- 'data': { '*cookie': 'str',
- '*sslverify': 'bool',
- '*cookie-secret': 'str',
- '*force-range': 'bool'} }
+ 'base': 'BlockdevOptionsCurlHttp',
+ 'data': { '*sslverify': 'bool' } }
```
? Would you rather see this in a separate commit or is the same patch OK
?
--
Antoine 'xdbob' Damhet
Engineer @scaleway
>
> > { 'struct': 'BlockdevOptionsCurlHttps',
> > 'base': 'BlockdevOptionsCurlBase',
> > 'data': { '*cookie': 'str',
> > '*sslverify': 'bool',
> > - '*cookie-secret': 'str'} }
> > + '*cookie-secret': 'str',
> > + '*force-range': 'bool'} }
> >
> > ##
> > # @BlockdevOptionsCurlFtp:
>
Antoine Damhet <adamhet@scaleway.com> writes:
> On Tue, Feb 17, 2026 at 09:53:11AM +0100, Markus Armbruster wrote:
>> Antoine Damhet <adamhet@scaleway.com> writes:
>>
>> > S3 presigned URLs are signed for a specific HTTP method (typically GET
>> > for our use cases). The curl block driver currently issues a HEAD
>> > request to discover the backend features and the file size, which fails
>> > with 403.
>> >
>> > Add a 'force-range' option that skips the HEAD request and instead
>> > issues a minimal GET request (querying 1 byte from the server) to
>> > extract the file size from the 'Content-Range' response header. To
>> > achieve this the 'curl_header_cb' is redesigned to generically parse
>> > HTTP headers.
>> >
>> > $ $QEMU -drive driver=http,\
>> > 'url=https://s3.example.com/some.img?X-Amz-Security-Token=XXX',
>> > force-range=true
>> >
>> > Enabling the 'force-range' option without the backend supporting it is
>> > undefined behavior and untested
>>
>> "Undefined behavior" suggests it could do anything, even destroy data.
>> I hope that's not the case. What is the case?
>>
>> What is "the backend"? The web server specified with @url?
>
> Undefined behavior was probably too strong of a wording. I have done way
> more tests and have a clearer picture of what happens:
>
> The web server for @url will respond with `HTTP 200` and try to send the
> whole file. Since we specified `CURLOPT_NOBODY` to the libcurl it stops
> reading the socket after the headers and justs shuts it down. The
> `force-range` mode is transparent for the user even if it can wastes a
> few TCP packets.
>
> I'll rewrite the commit message to reflect the actual behavior of the
> option in the v2.
Thanks!
>> > but the libcurl should ignore the body
>> > and stop reading after the HTTP headers then we would fail with the
>> > expected `Server does not support 'range' (byte ranges).` error.
>> >
>> > Signed-off-by: Antoine Damhet <adamhet@scaleway.com>
>> > ---
>>
>> [...]
>>
>> > diff --git a/qapi/block-core.json b/qapi/block-core.json
>> > index b82af7425614..ff018c2d6bfb 100644
>> > --- a/qapi/block-core.json
>> > +++ b/qapi/block-core.json
>> > @@ -4582,12 +4582,17 @@
>> > # @cookie-secret: ID of a QCryptoSecret object providing the cookie
>> > # data in a secure way. See @cookie for the format. (since 2.10)
>> > #
>> > +# @force-range: Don't issue a HEAD HTTP request to discover if the
>> > +# backend supports range requests and rely only on GET requests.
>> > +# This is especially useful for S3 presigned URLs. (since 11.0)
Missing: Defaults to false.
>> Unlike the commit message, this doesn't mention the need for "the
>> backend" (whatever that may be) supporting it.
>
> Will rephrase "the backend" with "the http server". Should I document
> the behavior of the http server missing the range requests here or is
> the current description sufficient ?
What do users need to know here? I think it's when and why to use
@force-range. Drawbacks of using it if there are any.
>> > +#
>> > # Since: 2.9
>> > ##
>> > { 'struct': 'BlockdevOptionsCurlHttp',
>> > 'base': 'BlockdevOptionsCurlBase',
>> > 'data': { '*cookie': 'str',
>> > - '*cookie-secret': 'str'} }
>> > + '*cookie-secret': 'str',
>> > + '*force-range': 'bool'} }
>> >
>> > ##
>> > # @BlockdevOptionsCurlHttps:
>> > @@ -4605,13 +4610,18 @@
>> > # @cookie-secret: ID of a QCryptoSecret object providing the cookie
>> > # data in a secure way. See @cookie for the format. (since 2.10)
>> > #
>> > +# @force-range: Don't issue a HEAD HTTP request to discover if the
>> > +# backend supports range requests and rely only on GET requests.
>> > +# This is especially useful for S3 presigned URLs. (since 11.0)
>> > +#
>> > # Since: 2.9
>> > ##
>>
>> @force-range is is duplicated between BlockdevOptionsCurlHttp and
>> BlockdevOptionsCurlHttps. @cookie and @cookie-secret is already
>> duplicated before the patch. Time to factor out a common base type?
>
> This would be only on the QAPI ? looking something like:
>
> ```
> { 'struct': 'BlockdevOptionsCurlHttps',
> - 'base': 'BlockdevOptionsCurlBase',
> - 'data': { '*cookie': 'str',
> - '*sslverify': 'bool',
> - '*cookie-secret': 'str',
> - '*force-range': 'bool'} }
> + 'base': 'BlockdevOptionsCurlHttp',
> + 'data': { '*sslverify': 'bool' } }
> ```
Looks good to me.
> ? Would you rather see this in a separate commit or is the same patch OK
> ?
I'd prefer a separate commit.
© 2016 - 2026 Red Hat, Inc.