[PATCH 2/5] util/uri: Simplify uri_string_unescape()

Thomas Huth posted 5 patches 10 months, 1 week ago
There is a newer version of this series
[PATCH 2/5] util/uri: Simplify uri_string_unescape()
Posted by Thomas Huth 10 months, 1 week ago
uri_string_unescape() basically does the same as the glib function
g_uri_unescape_string(), with just an additional length parameter.
So we can simplify this function a lot by limiting the length with
g_strndup() first and then by calling g_uri_unescape_string() instead
of walking through the string manually.

Suggested-by: Stefan Weil <stefan.weil@weilnetz.de>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
 util/uri.c | 49 +++----------------------------------------------
 1 file changed, 3 insertions(+), 46 deletions(-)

diff --git a/util/uri.c b/util/uri.c
index 33b6c7214e..2a75f535ba 100644
--- a/util/uri.c
+++ b/util/uri.c
@@ -1561,15 +1561,6 @@ done_cd:
     return 0;
 }
 
-static int is_hex(char c)
-{
-    if (((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) ||
-        ((c >= 'A') && (c <= 'F'))) {
-        return 1;
-    }
-    return 0;
-}
-
 /**
  * uri_string_unescape:
  * @str:  the string to unescape
@@ -1585,8 +1576,7 @@ static int is_hex(char c)
  */
 char *uri_string_unescape(const char *str, int len)
 {
-    char *ret, *out;
-    const char *in;
+    g_autofree char *lstr = NULL;
 
     if (str == NULL) {
         return NULL;
@@ -1594,42 +1584,9 @@ char *uri_string_unescape(const char *str, int len)
     if (len <= 0) {
         len = strlen(str);
     }
-    if (len < 0) {
-        return NULL;
-    }
-
-    ret = g_malloc(len + 1);
+    lstr = g_strndup(str, len);
 
-    in = str;
-    out = ret;
-    while (len > 0) {
-        if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
-            in++;
-            if ((*in >= '0') && (*in <= '9')) {
-                *out = (*in - '0');
-            } else if ((*in >= 'a') && (*in <= 'f')) {
-                *out = (*in - 'a') + 10;
-            } else if ((*in >= 'A') && (*in <= 'F')) {
-                *out = (*in - 'A') + 10;
-            }
-            in++;
-            if ((*in >= '0') && (*in <= '9')) {
-                *out = *out * 16 + (*in - '0');
-            } else if ((*in >= 'a') && (*in <= 'f')) {
-                *out = *out * 16 + (*in - 'a') + 10;
-            } else if ((*in >= 'A') && (*in <= 'F')) {
-                *out = *out * 16 + (*in - 'A') + 10;
-            }
-            in++;
-            len -= 3;
-            out++;
-        } else {
-            *out++ = *in++;
-            len--;
-        }
-    }
-    *out = 0;
-    return ret;
+    return g_uri_unescape_string(lstr, NULL);
 }
 
 /**
-- 
2.43.0
Re: [PATCH 2/5] util/uri: Simplify uri_string_unescape()
Posted by Paolo Bonzini 10 months, 1 week ago
Il lun 22 gen 2024, 20:18 Thomas Huth <thuth@redhat.com> ha scritto:

> uri_string_unescape() basically does the same as the glib function
> g_uri_unescape_string(), with just an additional length parameter.
>

You can replace it altogether with g_uri_unescape_segment.

Paolo

So we can simplify this function a lot by limiting the length with
> g_strndup() first and then by calling g_uri_unescape_string() instead
> of walking through the string manually.
>
> Suggested-by: Stefan Weil <stefan.weil@weilnetz.de>
> Signed-off-by: Thomas Huth <thuth@redhat.com>
> ---
>  util/uri.c | 49 +++----------------------------------------------
>  1 file changed, 3 insertions(+), 46 deletions(-)
>
> diff --git a/util/uri.c b/util/uri.c
> index 33b6c7214e..2a75f535ba 100644
> --- a/util/uri.c
> +++ b/util/uri.c
> @@ -1561,15 +1561,6 @@ done_cd:
>      return 0;
>  }
>
> -static int is_hex(char c)
> -{
> -    if (((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) ||
> -        ((c >= 'A') && (c <= 'F'))) {
> -        return 1;
> -    }
> -    return 0;
> -}
> -
>  /**
>   * uri_string_unescape:
>   * @str:  the string to unescape
> @@ -1585,8 +1576,7 @@ static int is_hex(char c)
>   */
>  char *uri_string_unescape(const char *str, int len)
>  {
> -    char *ret, *out;
> -    const char *in;
> +    g_autofree char *lstr = NULL;
>
>      if (str == NULL) {
>          return NULL;
> @@ -1594,42 +1584,9 @@ char *uri_string_unescape(const char *str, int len)
>      if (len <= 0) {
>          len = strlen(str);
>      }
> -    if (len < 0) {
> -        return NULL;
> -    }
> -
> -    ret = g_malloc(len + 1);
> +    lstr = g_strndup(str, len);
>
> -    in = str;
> -    out = ret;
> -    while (len > 0) {
> -        if ((len > 2) && (*in == '%') && (is_hex(in[1])) &&
> (is_hex(in[2]))) {
> -            in++;
> -            if ((*in >= '0') && (*in <= '9')) {
> -                *out = (*in - '0');
> -            } else if ((*in >= 'a') && (*in <= 'f')) {
> -                *out = (*in - 'a') + 10;
> -            } else if ((*in >= 'A') && (*in <= 'F')) {
> -                *out = (*in - 'A') + 10;
> -            }
> -            in++;
> -            if ((*in >= '0') && (*in <= '9')) {
> -                *out = *out * 16 + (*in - '0');
> -            } else if ((*in >= 'a') && (*in <= 'f')) {
> -                *out = *out * 16 + (*in - 'a') + 10;
> -            } else if ((*in >= 'A') && (*in <= 'F')) {
> -                *out = *out * 16 + (*in - 'A') + 10;
> -            }
> -            in++;
> -            len -= 3;
> -            out++;
> -        } else {
> -            *out++ = *in++;
> -            len--;
> -        }
> -    }
> -    *out = 0;
> -    return ret;
> +    return g_uri_unescape_string(lstr, NULL);
>  }
>
>  /**
> --
> 2.43.0
>
>
Re: [PATCH 2/5] util/uri: Simplify uri_string_unescape()
Posted by Thomas Huth 10 months, 1 week ago
On 23/01/2024 11.25, Paolo Bonzini wrote:
> 
> 
> Il lun 22 gen 2024, 20:18 Thomas Huth <thuth@redhat.com 
> <mailto:thuth@redhat.com>> ha scritto:
> 
>     uri_string_unescape() basically does the same as the glib function
>     g_uri_unescape_string(), with just an additional length parameter.
> 
> 
> You can replace it altogether with g_uri_unescape_segment.

Oh, nice, I indeed missed that while looking at the glib docs! Thanks, I'll 
give it a try...

  Thomas
Re: [PATCH 2/5] util/uri: Simplify uri_string_unescape()
Posted by Stefan Weil via 10 months, 1 week ago
Am 22.01.24 um 20:17 schrieb Thomas Huth:

> uri_string_unescape() basically does the same as the glib function
> g_uri_unescape_string(), with just an additional length parameter.
> So we can simplify this function a lot by limiting the length with
> g_strndup() first and then by calling g_uri_unescape_string() instead
> of walking through the string manually.
>
> Suggested-by: Stefan Weil<stefan.weil@weilnetz.de>

Can my e-mail address be replaced by another one (sw@weilnetz.de)?

> Signed-off-by: Thomas Huth<thuth@redhat.com>
> ---
>   util/uri.c | 49 +++----------------------------------------------
>   1 file changed, 3 insertions(+), 46 deletions(-)
>
> diff --git a/util/uri.c b/util/uri.c
> index 33b6c7214e..2a75f535ba 100644
> --- a/util/uri.c
> +++ b/util/uri.c
> @@ -1561,15 +1561,6 @@ done_cd:
>       return 0;
>   }
>   
> -static int is_hex(char c)
> -{
> -    if (((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) ||
> -        ((c >= 'A') && (c <= 'F'))) {
> -        return 1;
> -    }
> -    return 0;
> -}
> -
>   /**
>    * uri_string_unescape:
>    * @str:  the string to unescape
> @@ -1585,8 +1576,7 @@ static int is_hex(char c)
>    */
>   char *uri_string_unescape(const char *str, int len)
>   {
> -    char *ret, *out;
> -    const char *in;
> +    g_autofree char *lstr = NULL;


Is it necessary to assign NULL? It does not look so.


>   
>       if (str == NULL) {
>           return NULL;
> @@ -1594,42 +1584,9 @@ char *uri_string_unescape(const char *str, int len)
>       if (len <= 0) {
>           len = strlen(str);
>       }
> -    if (len < 0) {
> -        return NULL;
> -    }
> -
> -    ret = g_malloc(len + 1);
> +    lstr = g_strndup(str, len);
>   
> -    in = str;
> -    out = ret;
> -    while (len > 0) {
> -        if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
> -            in++;
> -            if ((*in >= '0') && (*in <= '9')) {
> -                *out = (*in - '0');
> -            } else if ((*in >= 'a') && (*in <= 'f')) {
> -                *out = (*in - 'a') + 10;
> -            } else if ((*in >= 'A') && (*in <= 'F')) {
> -                *out = (*in - 'A') + 10;
> -            }
> -            in++;
> -            if ((*in >= '0') && (*in <= '9')) {
> -                *out = *out * 16 + (*in - '0');
> -            } else if ((*in >= 'a') && (*in <= 'f')) {
> -                *out = *out * 16 + (*in - 'a') + 10;
> -            } else if ((*in >= 'A') && (*in <= 'F')) {
> -                *out = *out * 16 + (*in - 'A') + 10;
> -            }
> -            in++;
> -            len -= 3;
> -            out++;
> -        } else {
> -            *out++ = *in++;
> -            len--;
> -        }
> -    }
> -    *out = 0;
> -    return ret;
> +    return g_uri_unescape_string(lstr, NULL);
>   }
>   
>   /**


Thank you.

Reviewed-by: Stefan Weil <sw@weilnetz.de>

Re: [PATCH 2/5] util/uri: Simplify uri_string_unescape()
Posted by Thomas Huth 10 months, 1 week ago
On 22/01/2024 22.22, Stefan Weil wrote:
> Am 22.01.24 um 20:17 schrieb Thomas Huth:
> 
>> uri_string_unescape() basically does the same as the glib function
>> g_uri_unescape_string(), with just an additional length parameter.
>> So we can simplify this function a lot by limiting the length with
>> g_strndup() first and then by calling g_uri_unescape_string() instead
>> of walking through the string manually.
>>
>> Suggested-by: Stefan Weil<stefan.weil@weilnetz.de>
> 
> Can my e-mail address be replaced by another one (sw@weilnetz.de)?

Sure! ... not sure where I copy-n-pasted the other one from ... sorry for that.

>> @@ -1585,8 +1576,7 @@ static int is_hex(char c)
>>    */
>>   char *uri_string_unescape(const char *str, int len)
>>   {
>> -    char *ret, *out;
>> -    const char *in;
>> +    g_autofree char *lstr = NULL;
> 
> 
> Is it necessary to assign NULL? It does not look so.

Yes, it's necessary for the early "return NULL" statement below. Since it's 
an g_autofree variable, it must either be set to a valid allocated buffer or 
NULL before returning.

>>   
>>       if (str == NULL) {
>>           return NULL;
>> @@ -1594,42 +1584,9 @@ char *uri_string_unescape(const char *str, int len)
>>       if (len <= 0) {
>>           len = strlen(str);
>>       }
>> -    if (len < 0) {
>> -        return NULL;
>> -    }
>> -
>> -    ret = g_malloc(len + 1);
>> +    lstr = g_strndup(str, len);
>>   
>> -    in = str;
>> -    out = ret;
>> -    while (len > 0) {
>> -        if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
>> -            in++;
>> -            if ((*in >= '0') && (*in <= '9')) {
>> -                *out = (*in - '0');
>> -            } else if ((*in >= 'a') && (*in <= 'f')) {
>> -                *out = (*in - 'a') + 10;
>> -            } else if ((*in >= 'A') && (*in <= 'F')) {
>> -                *out = (*in - 'A') + 10;
>> -            }
>> -            in++;
>> -            if ((*in >= '0') && (*in <= '9')) {
>> -                *out = *out * 16 + (*in - '0');
>> -            } else if ((*in >= 'a') && (*in <= 'f')) {
>> -                *out = *out * 16 + (*in - 'a') + 10;
>> -            } else if ((*in >= 'A') && (*in <= 'F')) {
>> -                *out = *out * 16 + (*in - 'A') + 10;
>> -            }
>> -            in++;
>> -            len -= 3;
>> -            out++;
>> -        } else {
>> -            *out++ = *in++;
>> -            len--;
>> -        }
>> -    }
>> -    *out = 0;
>> -    return ret;
>> +    return g_uri_unescape_string(lstr, NULL);
>>   }
>>   
>>   /**
> 
> 
> Thank you.
> 
> Reviewed-by: Stefan Weil <sw@weilnetz.de>

Thanks!

  Thomas