block/curl: rewrite http header parsing function

[PATCH] block/curl: rewrite http header parsing function

Posted by Michael Tokarev 1 year, 9 months ago

Existing code was long, unclear and twisty.

Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
---
 block/curl.c | 44 ++++++++++++++++++--------------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/block/curl.c b/block/curl.c
index 419f7c89ef..9802d0319d 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
     BDRVCURLState *s = opaque;
     size_t realsize = size * nmemb;
-    const char *header = (char *)ptr;
-    const char *end = header + realsize;
-    const char *accept_ranges = "accept-ranges:";
-    const char *bytes = "bytes";
+    const char *p = ptr;
+    const char *end = p + realsize;
+    const char *t = "accept-ranges : bytes "; /* A lowercase template */
 
-    if (realsize >= strlen(accept_ranges)
-        && g_ascii_strncasecmp(header, accept_ranges,
-                               strlen(accept_ranges)) == 0) {
-
-        char *p = strchr(header, ':') + 1;
-
-        /* Skip whitespace between the header name and value. */
-        while (p < end && *p && g_ascii_isspace(*p)) {
-            p++;
-        }
-
-        if (end - p >= strlen(bytes)
-            && strncmp(p, bytes, strlen(bytes)) == 0) {
-
-            /* Check that there is nothing but whitespace after the value. */
-            p += strlen(bytes);
-            while (p < end && *p && g_ascii_isspace(*p)) {
-                p++;
-            }
-
-            if (p == end || !*p) {
-                s->accept_range = true;
+    /* check if header matches the "t" template */
+    for (;;) {
+        if (*t == ' ') { /* space in t matches any amount of isspace in p */
+            if (p < end && g_ascii_isspace(*p)) {
+                ++p;
+            } else {
+                ++t;
             }
+        } else if (*t && p < end && *t == g_ascii_tolower(*p)) {
+            ++p, ++t;
+        } else {
+            break;
         }
     }
 
+    if (!*t && p == end) { /* if we managed to reach ends of both strings */
+        s->accept_range = true;
+    }
+
     return realsize;
 }
 
-- 
2.39.2

Re: [PATCH] block/curl: rewrite http header parsing function

Posted by Kevin Wolf 1 year, 8 months ago

Am 29.06.2024 um 16:25 hat Michael Tokarev geschrieben:
> Existing code was long, unclear and twisty.
> 
> Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
> ---
>  block/curl.c | 44 ++++++++++++++++++--------------------------
>  1 file changed, 18 insertions(+), 26 deletions(-)
> 
> diff --git a/block/curl.c b/block/curl.c
> index 419f7c89ef..9802d0319d 100644
> --- a/block/curl.c
> +++ b/block/curl.c
> @@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
>  {
>      BDRVCURLState *s = opaque;
>      size_t realsize = size * nmemb;
> -    const char *header = (char *)ptr;
> -    const char *end = header + realsize;
> -    const char *accept_ranges = "accept-ranges:";
> -    const char *bytes = "bytes";
> +    const char *p = ptr;
> +    const char *end = p + realsize;
> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */

I don't think spaces between the field name and the colon are allowed
in the spec (and in the old code), only before and after the value.

> -    if (realsize >= strlen(accept_ranges)
> -        && g_ascii_strncasecmp(header, accept_ranges,
> -                               strlen(accept_ranges)) == 0) {
> -
> -        char *p = strchr(header, ':') + 1;
> -
> -        /* Skip whitespace between the header name and value. */
> -        while (p < end && *p && g_ascii_isspace(*p)) {
> -            p++;
> -        }
> -
> -        if (end - p >= strlen(bytes)
> -            && strncmp(p, bytes, strlen(bytes)) == 0) {
> -
> -            /* Check that there is nothing but whitespace after the value. */
> -            p += strlen(bytes);
> -            while (p < end && *p && g_ascii_isspace(*p)) {
> -                p++;
> -            }
> -
> -            if (p == end || !*p) {
> -                s->accept_range = true;
> +    /* check if header matches the "t" template */
> +    for (;;) {
> +        if (*t == ' ') { /* space in t matches any amount of isspace in p */
> +            if (p < end && g_ascii_isspace(*p)) {
> +                ++p;
> +            } else {
> +                ++t;
>              }
> +        } else if (*t && p < end && *t == g_ascii_tolower(*p)) {
> +            ++p, ++t;
> +        } else {
> +            break;
>          }
>      }
>  
> +    if (!*t && p == end) { /* if we managed to reach ends of both strings */
> +        s->accept_range = true;
> +    }

Maybe make the generic comparison with a template a separate function
(maybe even in cutils.c?) so that curl_header_cb() essentially only has
something like this any more:

if (!qemu_memcasecmp_space(ptr, end, "accept-ranges: bytes ")) {
    s->accept_range = true;
}

(A better name for the function would be preferable, of course. Maybe
also a bool return value, but if it has a name related to memcmp() or
strcmp(), then 0 must mean it matches.)

Then this would really highlight the curl specific logic rather than the
string parser in curl_header_cb().

Kevin

Re: [PATCH] block/curl: rewrite http header parsing function

Posted by Vladimir Sementsov-Ogievskiy 1 year, 9 months ago

On 29.06.24 17:25, Michael Tokarev wrote:
> Existing code was long, unclear and twisty.
> 
> Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>

> ---
>   block/curl.c | 44 ++++++++++++++++++--------------------------
>   1 file changed, 18 insertions(+), 26 deletions(-)
> 
> diff --git a/block/curl.c b/block/curl.c
> index 419f7c89ef..9802d0319d 100644
> --- a/block/curl.c
> +++ b/block/curl.c
> @@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
>   {
>       BDRVCURLState *s = opaque;
>       size_t realsize = size * nmemb;
> -    const char *header = (char *)ptr;
> -    const char *end = header + realsize;
> -    const char *accept_ranges = "accept-ranges:";
> -    const char *bytes = "bytes";
> +    const char *p = ptr;
> +    const char *end = p + realsize;
> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */

Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon.

>   
> -    if (realsize >= strlen(accept_ranges)
> -        && g_ascii_strncasecmp(header, accept_ranges,
> -                               strlen(accept_ranges)) == 0) {
> -
> -        char *p = strchr(header, ':') + 1;
> -
> -        /* Skip whitespace between the header name and value. */
> -        while (p < end && *p && g_ascii_isspace(*p)) {
> -            p++;
> -        }
> -
> -        if (end - p >= strlen(bytes)
> -            && strncmp(p, bytes, strlen(bytes)) == 0) {
> -
> -            /* Check that there is nothing but whitespace after the value. */
> -            p += strlen(bytes);
> -            while (p < end && *p && g_ascii_isspace(*p)) {
> -                p++;
> -            }
> -
> -            if (p == end || !*p) {
> -                s->accept_range = true;
> +    /* check if header matches the "t" template */
> +    for (;;) {
> +        if (*t == ' ') { /* space in t matches any amount of isspace in p */
> +            if (p < end && g_ascii_isspace(*p)) {
> +                ++p;
> +            } else {
> +                ++t;
>               }
> +        } else if (*t && p < end && *t == g_ascii_tolower(*p)) {
> +            ++p, ++t;
> +        } else {
> +            break;
>           }
>       }
>   
> +    if (!*t && p == end) { /* if we managed to reach ends of both strings */
> +        s->accept_range = true;
> +    }
> +
>       return realsize;
>   }
>   

-- 
Best regards,
Vladimir

Re: [PATCH] block/curl: rewrite http header parsing function

Posted by Michael Tokarev 1 year, 9 months ago

01.07.2024 09:54, Vladimir Sementsov-Ogievskiy wrote:

>> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */
> 
> Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon.

Yes, exactly.

I should add this to the description (wanted to do that but forgot).
I'll update the patch (without re-sending) - hopefully its' okay to
keep your S-o-b :)

Thanks,

/mjt

-- 
GPG Key transition (from rsa2048 to rsa4096) since 2024-04-24.
New key: rsa4096/61AD3D98ECDF2C8E  9D8B E14E 3F2A 9DD7 9199  28F1 61AD 3D98 ECDF 2C8E
Old key: rsa2048/457CE0A0804465C5  6EE1 95D1 886E 8FFB 810D  4324 457C E0A0 8044 65C5
Transition statement: http://www.corpit.ru/mjt/gpg-transition-2024.txt

Re: [PATCH] block/curl: rewrite http header parsing function

Posted by Vladimir Sementsov-Ogievskiy 1 year, 9 months ago

On 01.07.24 09:55, Michael Tokarev wrote:
> 01.07.2024 09:54, Vladimir Sementsov-Ogievskiy wrote:
> 
>>> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */
>>
>> Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon.
> 
> Yes, exactly.
> 
> I should add this to the description (wanted to do that but forgot).
> I'll update the patch (without re-sending) - hopefully its' okay to
> keep your S-o-b :)
> 

Of course!

-- 
Best regards,
Vladimir