qobject: Accept "%"PRId64 in qobject_from_jsonf()

[Qemu-devel] [PATCH] qobject: Accept "%"PRId64 in qobject_from_jsonf()

Posted by Eric Blake 8 years, 6 months ago

Commit 1792d7d0 was written because PRId64 expands to non-portable
crap for some libc, and we had testsuite failures on Mac OS as a
result.  This in turn makes it difficult to rely on the obvious
conversions of 64-bit values into JSON, requiring things such as
casting int64_t to long long so we can use a reliable %lld and
still keep -Wformat happy.  So now it's time to fix that.

We are already lexing %I64d (hello mingw); now expand the lexer
to parse %qd (hello Mac OS). In the process, we can drop one
state: IN_ESCAPE_I64 is redundant with IN_ESCAPE_LL.  And fix a
comment that mistakenly omitted %u as a supported escape.

Next, tweak the parser to accept the exact spelling of PRId64
regardless of what it expands to (note that there are are now dead
branches in the 'if' chain for some platforms, like glibc; but all
branches are necessary on other platforms).  We are at least safe
in that we are parsing the correct size 32-bit or a 64-bit quantity
on whatever branch we end up in.  And of course, update the
testsuite for coverage of the feature.

Finally, update configure to barf on any libc that uses yet
another form of PRId64 that we have not yet coded for, so that we
can once again update json-lexer.c to cater to those quirks (my
guess? we might see %jd next).  Yes, the only way I could find
to quickly do and still work when cross-compiling is to grep a
compiled binary; C does not make it easy to turn a string constant
into an integer constant, let along make preprocessor decisions;
and even parsing '$CC -E' output is fragile, since 64-bit glibc
pre-processes as "l" "d" rather than "ld".  I'm assuming that
'strings' is portable enough during configure.

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 configure             | 21 +++++++++++++++++++++
 qobject/json-lexer.c  | 11 +++--------
 qobject/json-parser.c | 10 ++++++----
 tests/check-qjson.c   |  7 +++++++
 4 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/configure b/configure
index 6b52e19ee3..b810ff970d 100755
--- a/configure
+++ b/configure
@@ -3239,6 +3239,27 @@ for i in $glib_modules; do
     fi
 done

+# Sanity check that we can parse PRId64 and friends in json-lexer.c
+# (Sadly, the "easiest" way to do this is to grep the compiled binary)
+cat > $TMPC <<EOF
+#include <inttypes.h>
+int main(void) {
+    static const char findme[] = "UnLiKeLyToClAsH_" PRId64;
+    return !findme[0];
+}
+EOF
+if ! compile_prog "$CFLAGS" "$LIBS" ; then
+    error_exit "can't determine value of PRId64"
+fi
+nl='
+'
+case $(strings $TMPE | grep ^UnLiKeLyToClAsH) in
+    '' | *"$nl"* ) error_exit "can't determine value of PRId64" ;;
+    *_ld | *_lld | *_I64d | *_qd) ;;
+    *) error_exit "unexepected value of PRId64, please add %$(strings $TMPE |
+       sed -n s/^UnLiKeLyToClAsH_//p) support to json-lexer.c" ;;
+esac
+
 # Sanity check that the current size_t matches the
 # size that glib thinks it should be. This catches
 # problems on multi-arch where people try to build
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 980ba159d6..98b1ec8e35 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -31,7 +31,7 @@
  *
  * Extension for vararg handling in JSON construction:
  *
- * %((l|ll|I64)?d|[ipsf])
+ * %(PRI[du]64|(l|ll)?[ud]|[ipsf])
  *
  */

@@ -63,7 +63,6 @@ enum json_lexer_state {
     IN_ESCAPE_LL,
     IN_ESCAPE_I,
     IN_ESCAPE_I6,
-    IN_ESCAPE_I64,
     IN_WHITESPACE,
     IN_START,
 };
@@ -236,13 +235,8 @@ static const uint8_t json_lexer[][256] =  {
         ['u'] = JSON_ESCAPE,
     },

-    [IN_ESCAPE_I64] = {
-        ['d'] = JSON_ESCAPE,
-        ['u'] = JSON_ESCAPE,
-    },
-
     [IN_ESCAPE_I6] = {
-        ['4'] = IN_ESCAPE_I64,
+        ['4'] = IN_ESCAPE_LL,
     },

     [IN_ESCAPE_I] = {
@@ -257,6 +251,7 @@ static const uint8_t json_lexer[][256] =  {
         ['u'] = JSON_ESCAPE,
         ['f'] = JSON_ESCAPE,
         ['l'] = IN_ESCAPE_L,
+        ['q'] = IN_ESCAPE_LL,
         ['I'] = IN_ESCAPE_I,
     },

diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 7a417f20cd..f01e97fc6b 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -470,15 +470,17 @@ static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
         return QOBJECT(qnum_from_int(va_arg(*ap, int)));
     } else if (!strcmp(token->str, "%ld")) {
         return QOBJECT(qnum_from_int(va_arg(*ap, long)));
-    } else if (!strcmp(token->str, "%lld") ||
-               !strcmp(token->str, "%I64d")) {
+    } else if (!strcmp(token->str, "%" PRId64)) {
+        return QOBJECT(qnum_from_int(va_arg(*ap, int64_t)));
+    } else if (!strcmp(token->str, "%lld")) {
         return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
     } else if (!strcmp(token->str, "%u")) {
         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int)));
     } else if (!strcmp(token->str, "%lu")) {
         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long)));
-    } else if (!strcmp(token->str, "%llu") ||
-               !strcmp(token->str, "%I64u")) {
+    } else if (!strcmp(token->str, "%" PRIu64)) {
+        return QOBJECT(qnum_from_uint(va_arg(*ap, uint64_t)));
+    } else if (!strcmp(token->str, "%llu")) {
         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long)));
     } else if (!strcmp(token->str, "%s")) {
         return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 53f2275b9b..d55d6d9ed1 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -990,8 +990,10 @@ static void vararg_number(void)
     QNum *qnum;
     int value = 0x2342;
     long long value_ll = 0x2342342343LL;
+    uint64_t value_u = UINT64_C(0x2342342343);
     double valuef = 2.323423423;
     int64_t val;
+    uint64_t uval;

     qnum = qobject_to_qnum(qobject_from_jsonf("%d", value));
     g_assert(qnum_get_try_int(qnum, &val));
@@ -1003,6 +1005,11 @@ static void vararg_number(void)
     g_assert_cmpint(val, ==, value_ll);
     QDECREF(qnum);

+    qnum = qobject_to_qnum(qobject_from_jsonf("%" PRIu64, value_u));
+    g_assert(qnum_get_try_uint(qnum, &uval));
+    g_assert_cmpint(uval, ==, value_u);
+    QDECREF(qnum);
+
     qnum = qobject_to_qnum(qobject_from_jsonf("%f", valuef));
     g_assert(qnum_get_double(qnum) == valuef);
     QDECREF(qnum);
-- 
2.13.3

Re: [Qemu-devel] [PATCH] qobject: Accept "%"PRId64 in qobject_from_jsonf()

Posted by Markus Armbruster 8 years, 6 months ago

Eric Blake <eblake@redhat.com> writes:

> Commit 1792d7d0 was written because PRId64 expands to non-portable
> crap for some libc, and we had testsuite failures on Mac OS as a
> result.  This in turn makes it difficult to rely on the obvious
> conversions of 64-bit values into JSON, requiring things such as
> casting int64_t to long long so we can use a reliable %lld and
> still keep -Wformat happy.  So now it's time to fix that.
>
> We are already lexing %I64d (hello mingw); now expand the lexer
> to parse %qd (hello Mac OS). In the process, we can drop one
> state: IN_ESCAPE_I64 is redundant with IN_ESCAPE_LL.  And fix a
> comment that mistakenly omitted %u as a supported escape.
>
> Next, tweak the parser to accept the exact spelling of PRId64
> regardless of what it expands to (note that there are are now dead
> branches in the 'if' chain for some platforms, like glibc; but all
> branches are necessary on other platforms).  We are at least safe
> in that we are parsing the correct size 32-bit or a 64-bit quantity
> on whatever branch we end up in.  And of course, update the
> testsuite for coverage of the feature.
>
> Finally, update configure to barf on any libc that uses yet
> another form of PRId64 that we have not yet coded for, so that we
> can once again update json-lexer.c to cater to those quirks (my
> guess? we might see %jd next).  Yes, the only way I could find
> to quickly do and still work when cross-compiling is to grep a
> compiled binary; C does not make it easy to turn a string constant
> into an integer constant, let along make preprocessor decisions;
> and even parsing '$CC -E' output is fragile, since 64-bit glibc
> pre-processes as "l" "d" rather than "ld".  I'm assuming that
> 'strings' is portable enough during configure.
>
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>  configure             | 21 +++++++++++++++++++++
>  qobject/json-lexer.c  | 11 +++--------
>  qobject/json-parser.c | 10 ++++++----
>  tests/check-qjson.c   |  7 +++++++
>  4 files changed, 37 insertions(+), 12 deletions(-)
>
> diff --git a/configure b/configure
> index 6b52e19ee3..b810ff970d 100755
> --- a/configure
> +++ b/configure
> @@ -3239,6 +3239,27 @@ for i in $glib_modules; do
>      fi
>  done
>
> +# Sanity check that we can parse PRId64 and friends in json-lexer.c
> +# (Sadly, the "easiest" way to do this is to grep the compiled binary)
> +cat > $TMPC <<EOF
> +#include <inttypes.h>
> +int main(void) {
> +    static const char findme[] = "UnLiKeLyToClAsH_" PRId64;
> +    return !findme[0];
> +}
> +EOF
> +if ! compile_prog "$CFLAGS" "$LIBS" ; then
> +    error_exit "can't determine value of PRId64"
> +fi
> +nl='
> +'
> +case $(strings $TMPE | grep ^UnLiKeLyToClAsH) in
> +    '' | *"$nl"* ) error_exit "can't determine value of PRId64" ;;
> +    *_ld | *_lld | *_I64d | *_qd) ;;
> +    *) error_exit "unexepected value of PRId64, please add %$(strings $TMPE |
> +       sed -n s/^UnLiKeLyToClAsH_//p) support to json-lexer.c" ;;
> +esac
> +

Why is this easier or more robust than examining output of the
preprocessor?  Hmm, you explain it in the commit message.  I think you
should also (briefly!) explain it in the "Sadly" comment.

>  # Sanity check that the current size_t matches the
>  # size that glib thinks it should be. This catches
>  # problems on multi-arch where people try to build
> diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
> index 980ba159d6..98b1ec8e35 100644
> --- a/qobject/json-lexer.c
> +++ b/qobject/json-lexer.c
> @@ -31,7 +31,7 @@
>   *
>   * Extension for vararg handling in JSON construction:
>   *
> - * %((l|ll|I64)?d|[ipsf])
> + * %(PRI[du]64|(l|ll)?[ud]|[ipsf])

Confusing.  The lexer accepts more than that, but parse_escape() filters
it out.  Need a comment explaining what, because the latter isn't
locally obvious.

>   *
>   */
>
> @@ -63,7 +63,6 @@ enum json_lexer_state {
>      IN_ESCAPE_LL,
>      IN_ESCAPE_I,
>      IN_ESCAPE_I6,
> -    IN_ESCAPE_I64,
>      IN_WHITESPACE,
>      IN_START,
>  };
> @@ -236,13 +235,8 @@ static const uint8_t json_lexer[][256] =  {
>          ['u'] = JSON_ESCAPE,
>      },
>
> -    [IN_ESCAPE_I64] = {
> -        ['d'] = JSON_ESCAPE,
> -        ['u'] = JSON_ESCAPE,
> -    },
> -
>      [IN_ESCAPE_I6] = {
> -        ['4'] = IN_ESCAPE_I64,
> +        ['4'] = IN_ESCAPE_LL,
>      },
>
>      [IN_ESCAPE_I] = {
> @@ -257,6 +251,7 @@ static const uint8_t json_lexer[][256] =  {
>          ['u'] = JSON_ESCAPE,
>          ['f'] = JSON_ESCAPE,
>          ['l'] = IN_ESCAPE_L,
> +        ['q'] = IN_ESCAPE_LL,
>          ['I'] = IN_ESCAPE_I,
>      },
>

IN_ESCAPE_LL becomes a bit of a misnomer.  IN_ESCAPE_DU?

> diff --git a/qobject/json-parser.c b/qobject/json-parser.c
> index 7a417f20cd..f01e97fc6b 100644
> --- a/qobject/json-parser.c
> +++ b/qobject/json-parser.c
> @@ -470,15 +470,17 @@ static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
>          return QOBJECT(qnum_from_int(va_arg(*ap, int)));
>      } else if (!strcmp(token->str, "%ld")) {
>          return QOBJECT(qnum_from_int(va_arg(*ap, long)));
> -    } else if (!strcmp(token->str, "%lld") ||
> -               !strcmp(token->str, "%I64d")) {
> +    } else if (!strcmp(token->str, "%" PRId64)) {
> +        return QOBJECT(qnum_from_int(va_arg(*ap, int64_t)));
> +    } else if (!strcmp(token->str, "%lld")) {
>          return QOBJECT(qnum_from_int(va_arg(*ap, long long)));

Let's do "ll" before PRId64.

>      } else if (!strcmp(token->str, "%u")) {
>          return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int)));
>      } else if (!strcmp(token->str, "%lu")) {
>          return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long)));
> -    } else if (!strcmp(token->str, "%llu") ||
> -               !strcmp(token->str, "%I64u")) {
> +    } else if (!strcmp(token->str, "%" PRIu64)) {
> +        return QOBJECT(qnum_from_uint(va_arg(*ap, uint64_t)));
> +    } else if (!strcmp(token->str, "%llu")) {
>          return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long)));

Likewise.

>      } else if (!strcmp(token->str, "%s")) {
>          return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
> diff --git a/tests/check-qjson.c b/tests/check-qjson.c
> index 53f2275b9b..d55d6d9ed1 100644
> --- a/tests/check-qjson.c
> +++ b/tests/check-qjson.c
> @@ -990,8 +990,10 @@ static void vararg_number(void)
>      QNum *qnum;
>      int value = 0x2342;
>      long long value_ll = 0x2342342343LL;
> +    uint64_t value_u = UINT64_C(0x2342342343);

Is UINT64_C() really necessary here?

Call the variable value_u64?

>      double valuef = 2.323423423;
>      int64_t val;
> +    uint64_t uval;
>
>      qnum = qobject_to_qnum(qobject_from_jsonf("%d", value));
>      g_assert(qnum_get_try_int(qnum, &val));
> @@ -1003,6 +1005,11 @@ static void vararg_number(void)
>      g_assert_cmpint(val, ==, value_ll);
>      QDECREF(qnum);
>
> +    qnum = qobject_to_qnum(qobject_from_jsonf("%" PRIu64, value_u));
> +    g_assert(qnum_get_try_uint(qnum, &uval));
> +    g_assert_cmpint(uval, ==, value_u);
> +    QDECREF(qnum);
> +
>      qnum = qobject_to_qnum(qobject_from_jsonf("%f", valuef));
>      g_assert(qnum_get_double(qnum) == valuef);
>      QDECREF(qnum);

Re: [Qemu-devel] [PATCH] qobject: Accept "%"PRId64 in qobject_from_jsonf()

Posted by Eric Blake 8 years, 6 months ago

On 07/24/2017 04:06 AM, Markus Armbruster wrote:
> Eric Blake <eblake@redhat.com> writes:
> 
>> Commit 1792d7d0 was written because PRId64 expands to non-portable
>> crap for some libc, and we had testsuite failures on Mac OS as a
>> result.  This in turn makes it difficult to rely on the obvious
>> conversions of 64-bit values into JSON, requiring things such as
>> casting int64_t to long long so we can use a reliable %lld and
>> still keep -Wformat happy.  So now it's time to fix that.
>>

>> +case $(strings $TMPE | grep ^UnLiKeLyToClAsH) in
>> +    '' | *"$nl"* ) error_exit "can't determine value of PRId64" ;;
>> +    *_ld | *_lld | *_I64d | *_qd) ;;
>> +    *) error_exit "unexepected value of PRId64, please add %$(strings $TMPE |
>> +       sed -n s/^UnLiKeLyToClAsH_//p) support to json-lexer.c" ;;
>> +esac
>> +
> 
> Why is this easier or more robust than examining output of the
> preprocessor?  Hmm, you explain it in the commit message.  I think you
> should also (briefly!) explain it in the "Sadly" comment.

Okay.  (Something along the lines of: We can't guarantee if the
preprocessor will produce "ld" or "l" "d", nor even if the expansion
will occur on the same line as any marker)

I also wonder if I should anchor some \n in the magic bytes being
searched for in the binary, so that if 'strings' fails (which may indeed
be the case for a mingw binary), then falling back to raw grep may also
have a chance.  But first, I'm hoping to get some patchew feedback first
if one of the build platforms has problems with the current attempt.


>> +++ b/qobject/json-lexer.c
>> @@ -31,7 +31,7 @@
>>   *
>>   * Extension for vararg handling in JSON construction:
>>   *
>> - * %((l|ll|I64)?d|[ipsf])
>> + * %(PRI[du]64|(l|ll)?[ud]|[ipsf])
> 
> Confusing.  The lexer accepts more than that, but parse_escape() filters
> it out.  Need a comment explaining what, because the latter isn't
> locally obvious.

True - we lex all known forms, and then only parse the current
platform's form.  Will improve the comment.


>>      } else if (!strcmp(token->str, "%ld")) {
>>          return QOBJECT(qnum_from_int(va_arg(*ap, long)));
>> -    } else if (!strcmp(token->str, "%lld") ||
>> -               !strcmp(token->str, "%I64d")) {
>> +    } else if (!strcmp(token->str, "%" PRId64)) {
>> +        return QOBJECT(qnum_from_int(va_arg(*ap, int64_t)));
>> +    } else if (!strcmp(token->str, "%lld")) {
>>          return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
> 
> Let's do "ll" before PRId64.

Sure.


>> +++ b/tests/check-qjson.c
>> @@ -990,8 +990,10 @@ static void vararg_number(void)
>>      QNum *qnum;
>>      int value = 0x2342;
>>      long long value_ll = 0x2342342343LL;
>> +    uint64_t value_u = UINT64_C(0x2342342343);
> 
> Is UINT64_C() really necessary here?

Not as long as none of the compilers we use complains about uint64_t x =
1ULL.  I'll simplify, then we can uglify if a compiler complains.

> 
> Call the variable value_u64?

Yes.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH] qobject: Accept "%"PRId64 in qobject_from_jsonf()

Posted by Markus Armbruster 8 years, 6 months ago

Eric Blake <eblake@redhat.com> writes:

> On 07/24/2017 04:06 AM, Markus Armbruster wrote:
>> Eric Blake <eblake@redhat.com> writes:
>> 
>>> Commit 1792d7d0 was written because PRId64 expands to non-portable
>>> crap for some libc, and we had testsuite failures on Mac OS as a
>>> result.  This in turn makes it difficult to rely on the obvious
>>> conversions of 64-bit values into JSON, requiring things such as
>>> casting int64_t to long long so we can use a reliable %lld and
>>> still keep -Wformat happy.  So now it's time to fix that.
>>>
>
>>> +case $(strings $TMPE | grep ^UnLiKeLyToClAsH) in
>>> +    '' | *"$nl"* ) error_exit "can't determine value of PRId64" ;;
>>> +    *_ld | *_lld | *_I64d | *_qd) ;;
>>> +    *) error_exit "unexepected value of PRId64, please add %$(strings $TMPE |
>>> +       sed -n s/^UnLiKeLyToClAsH_//p) support to json-lexer.c" ;;
>>> +esac
>>> +
>> 
>> Why is this easier or more robust than examining output of the
>> preprocessor?  Hmm, you explain it in the commit message.  I think you
>> should also (briefly!) explain it in the "Sadly" comment.
>
> Okay.  (Something along the lines of: We can't guarantee if the
> preprocessor will produce "ld" or "l" "d", nor even if the expansion
> will occur on the same line as any marker)

Yes.

> I also wonder if I should anchor some \n in the magic bytes being
> searched for in the binary, so that if 'strings' fails (which may indeed
> be the case for a mingw binary), then falling back to raw grep may also
> have a chance.  But first, I'm hoping to get some patchew feedback first
> if one of the build platforms has problems with the current attempt.
>
>
>>> +++ b/qobject/json-lexer.c
>>> @@ -31,7 +31,7 @@
>>>   *
>>>   * Extension for vararg handling in JSON construction:
>>>   *
>>> - * %((l|ll|I64)?d|[ipsf])
>>> + * %(PRI[du]64|(l|ll)?[ud]|[ipsf])
>> 
>> Confusing.  The lexer accepts more than that, but parse_escape() filters
>> it out.  Need a comment explaining what, because the latter isn't
>> locally obvious.
>
> True - we lex all known forms, and then only parse the current
> platform's form.  Will improve the comment.
>
>
>>>      } else if (!strcmp(token->str, "%ld")) {
>>>          return QOBJECT(qnum_from_int(va_arg(*ap, long)));
>>> -    } else if (!strcmp(token->str, "%lld") ||
>>> -               !strcmp(token->str, "%I64d")) {
>>> +    } else if (!strcmp(token->str, "%" PRId64)) {
>>> +        return QOBJECT(qnum_from_int(va_arg(*ap, int64_t)));
>>> +    } else if (!strcmp(token->str, "%lld")) {
>>>          return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
>> 
>> Let's do "ll" before PRId64.
>
> Sure.
>
>
>>> +++ b/tests/check-qjson.c
>>> @@ -990,8 +990,10 @@ static void vararg_number(void)
>>>      QNum *qnum;
>>>      int value = 0x2342;
>>>      long long value_ll = 0x2342342343LL;
>>> +    uint64_t value_u = UINT64_C(0x2342342343);
>> 
>> Is UINT64_C() really necessary here?
>
> Not as long as none of the compilers we use complains about uint64_t x =
> 1ULL.  I'll simplify, then we can uglify if a compiler complains.

The type of plain 0x2342342343 is the first of int, unsigned int, long
int, unsigned long int, long long int, unsigned long long int that can
represent it.  In practice, either long or long long.  Can't see
anything for compilers to whine about there, but I've been wrong before.

>> Call the variable value_u64?
>
> Yes.